{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Soft Actor-Critic with Auto $\\alpha$ Tuning to Play LunarLanderContinuous-v2\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import losses\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import models\n",
    "from tensorflow_probability import distributions\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "08:13:48 [INFO] env: <LunarLanderContinuous<LunarLanderContinuous-v2>>\n",
      "08:13:48 [INFO] action_space: Box(-1.0, 1.0, (2,), float32)\n",
      "08:13:48 [INFO] observation_space: Box(-inf, inf, (8,), float32)\n",
      "08:13:48 [INFO] reward_range: (-inf, inf)\n",
      "08:13:48 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "08:13:48 [INFO] _max_episode_steps: 1000\n",
      "08:13:48 [INFO] _elapsed_steps: None\n",
      "08:13:48 [INFO] id: LunarLanderContinuous-v2\n",
      "08:13:48 [INFO] entry_point: gym.envs.box2d:LunarLanderContinuous\n",
      "08:13:48 [INFO] reward_threshold: 200\n",
      "08:13:48 [INFO] nondeterministic: False\n",
      "08:13:48 [INFO] max_episode_steps: 1000\n",
      "08:13:48 [INFO] _kwargs: {}\n",
      "08:13:48 [INFO] _env_name: LunarLanderContinuous\n"
     ]
    }
   ],
   "source": [
    "env = gym.make(\"LunarLanderContinuous-v2\")\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SACAgent:\n",
    "    def __init__(self, env):\n",
    "        state_dim = env.observation_space.shape[0]\n",
    "        action_dim = env.action_space.shape[0]\n",
    "        self.action_low = env.action_space.low\n",
    "        self.action_high = env.action_space.high\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.replayer = DQNReplayer(100000)\n",
    "\n",
    "        # create alpha\n",
    "        self.target_entropy = -action_dim\n",
    "        self.ln_alpha_tensor = tf.Variable(0., dtype=tf.float32)\n",
    "        self.alpha_optimizer = optimizers.Adam([self.ln_alpha_tensor,], lr=3e-4)\n",
    "\n",
    "        # create actor\n",
    "        self.actor_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256], output_size=action_dim*2,\n",
    "                output_activation=tf.tanh)\n",
    "\n",
    "        # create V critic\n",
    "        self.v_evaluate_net = self.build_net(input_size=state_dim,\n",
    "                hidden_sizes=[256, 256])\n",
    "        self.v_target_net = models.clone_model(self.v_evaluate_net)\n",
    "\n",
    "        # create Q critic\n",
    "        self.q0_net = self.build_net(input_size=state_dim+action_dim,\n",
    "                hidden_sizes=[256, 256])\n",
    "        self.q1_net = self.build_net(input_size=state_dim+action_dim,\n",
    "                hidden_sizes=[256, 256])\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size=1,\n",
    "                activation=nn.relu, output_activation=None,\n",
    "                loss=losses.mse, learning_rate=3e-4):\n",
    "        model = keras.Sequential()\n",
    "        for layer, hidden_size in enumerate(hidden_sizes):\n",
    "            kwargs = {'input_shape' : (input_size,)} if layer == 0 else {}\n",
    "            model.add(layers.Dense(units=hidden_size,\n",
    "                    activation=activation, **kwargs))\n",
    "        model.add(layers.Dense(units=output_size,\n",
    "                activation=output_activation))\n",
    "        optimizer = optimizers.Adam(learning_rate)\n",
    "        model.compile(optimizer=optimizer, loss=loss)\n",
    "        return model\n",
    "\n",
    "    def get_action_ln_prob_tensors(self, state_tensor):\n",
    "        mean_ln_std_tensor = self.actor_net(state_tensor)\n",
    "        mean_tensor, ln_std_tensor = tf.split(mean_ln_std_tensor, 2, axis=-1)\n",
    "        if self.mode == 'train':\n",
    "            std_tensor = tf.math.exp(ln_std_tensor)\n",
    "            normal_dist = distributions.Normal(mean_tensor, std_tensor)\n",
    "            sample_tensor = normal_dist.sample()\n",
    "            action_tensor = tf.tanh(sample_tensor)\n",
    "            ln_prob_tensor = normal_dist.log_prob(sample_tensor) - \\\n",
    "                    tf.math.log1p(1e-6 - tf.pow(action_tensor, 2))\n",
    "            ln_prob_tensor = tf.reduce_sum(ln_prob_tensor, axis=-1, keepdims=True)\n",
    "        else:\n",
    "            action_tensor = tf.tanh(mean_tensor)\n",
    "            ln_prob_tensor = tf.ones_like(action_tensor)\n",
    "        return action_tensor, ln_prob_tensor\n",
    "\n",
    "    def reset(self, mode):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        if self.mode == 'train' and self.replayer.count < 5000:\n",
    "            action = np.random.uniform(self.action_low, self.action_high)\n",
    "        else:\n",
    "            state_tensor = tf.convert_to_tensor(observation[np.newaxis, :],\n",
    "                    dtype=tf.float32)\n",
    "            action_tensor, _ = self.get_action_ln_prob_tensors(state_tensor)\n",
    "            action = action_tensor[0].numpy()\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 120:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        average_weights = [(1. - learning_rate) * t + learning_rate * e for t, e\n",
    "                in zip(target_net.get_weights(), evaluate_net.get_weights())]\n",
    "        target_net.set_weights(average_weights)\n",
    "\n",
    "    def learn(self):\n",
    "        states, actions, rewards, next_states, dones = self.replayer.sample(128)\n",
    "        state_tensor = tf.convert_to_tensor(states, dtype=tf.float32)\n",
    "\n",
    "        # train alpha\n",
    "        act_tensor, ln_prob_tensor = self.get_action_ln_prob_tensors(state_tensor)\n",
    "        with tf.GradientTape() as tape:\n",
    "            alpha_loss_tensor = -self.ln_alpha_tensor * (tf.reduce_mean(\n",
    "                    ln_prob_tensor, axis=-1) + self.target_entropy)\n",
    "        grads = tape.gradient(alpha_loss_tensor, [self.ln_alpha_tensor,])\n",
    "        self.alpha_optimizer.apply_gradients(zip(grads, [self.ln_alpha_tensor,]))\n",
    "\n",
    "        # train Q critic\n",
    "        state_actions = np.concatenate((states, actions), axis=-1)\n",
    "        next_vs = self.v_target_net.predict(next_states)\n",
    "        q_targets = rewards[:, np.newaxis] + \\\n",
    "                self.gamma * (1. - dones[:, np.newaxis]) * next_vs\n",
    "        self.q0_net.fit(state_actions, q_targets, verbose=False)\n",
    "        self.q1_net.fit(state_actions, q_targets, verbose=False)\n",
    "\n",
    "        # train V critic\n",
    "        state_act_tensor = tf.concat((state_tensor, act_tensor), axis=-1)\n",
    "        q0_pred_tensor = self.q0_net(state_act_tensor)\n",
    "        q1_pred_tensor = self.q1_net(state_act_tensor)\n",
    "        q_pred_tensor = tf.minimum(q0_pred_tensor, q1_pred_tensor)\n",
    "        alpha_tensor = tf.exp(self.ln_alpha_tensor)\n",
    "        v_target_tensor = q_pred_tensor - alpha_tensor * ln_prob_tensor\n",
    "        v_targets = v_target_tensor.numpy()\n",
    "        self.v_evaluate_net.fit(states, v_targets, verbose=False)\n",
    "        self.update_net(self.v_target_net, self.v_evaluate_net)\n",
    "\n",
    "        # train actor\n",
    "        with tf.GradientTape() as tape:\n",
    "            act_tensor, ln_prob_tensor = \\\n",
    "                    self.get_action_ln_prob_tensors(state_tensor)\n",
    "            state_act_tensor = tf.concat((state_tensor, act_tensor), axis=-1)\n",
    "            q0_pred_tensor = self.q0_net(state_act_tensor)\n",
    "            alpha_tensor = tf.exp(self.ln_alpha_tensor)\n",
    "            actor_loss_tensor = tf.reduce_mean(alpha_tensor * ln_prob_tensor -\n",
    "                    q0_pred_tensor)\n",
    "        grads = tape.gradient(actor_loss_tensor,\n",
    "                self.actor_net.trainable_variables)\n",
    "        self.actor_net.optimizer.apply_gradients(\n",
    "                zip(grads, self.actor_net.trainable_variables))\n",
    "\n",
    "\n",
    "\n",
    "agent = SACAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "08:13:51 [INFO] ==== train ====\n",
      "08:14:00 [DEBUG] train episode 0: reward = -110.26, steps = 140\n",
      "08:14:51 [DEBUG] train episode 1: reward = -249.29, steps = 154\n",
      "08:15:35 [DEBUG] train episode 2: reward = -282.50, steps = 131\n",
      "08:16:10 [DEBUG] train episode 3: reward = -299.41, steps = 104\n",
      "08:16:39 [DEBUG] train episode 4: reward = -37.76, steps = 84\n",
      "08:17:08 [DEBUG] train episode 5: reward = -42.23, steps = 76\n",
      "08:17:47 [DEBUG] train episode 6: reward = -181.15, steps = 107\n",
      "08:18:24 [DEBUG] train episode 7: reward = -161.18, steps = 98\n",
      "08:18:59 [DEBUG] train episode 8: reward = -305.59, steps = 92\n",
      "08:19:34 [DEBUG] train episode 9: reward = -143.95, steps = 90\n",
      "08:20:26 [DEBUG] train episode 10: reward = -426.00, steps = 135\n",
      "08:20:52 [DEBUG] train episode 11: reward = -98.31, steps = 66\n",
      "08:21:26 [DEBUG] train episode 12: reward = -14.49, steps = 86\n",
      "08:22:04 [DEBUG] train episode 13: reward = -78.21, steps = 100\n",
      "08:22:45 [DEBUG] train episode 14: reward = -175.84, steps = 105\n",
      "08:23:15 [DEBUG] train episode 15: reward = -60.30, steps = 78\n",
      "08:23:53 [DEBUG] train episode 16: reward = -549.33, steps = 98\n",
      "08:24:42 [DEBUG] train episode 17: reward = -214.00, steps = 128\n",
      "08:25:38 [DEBUG] train episode 18: reward = -331.81, steps = 146\n",
      "08:26:12 [DEBUG] train episode 19: reward = -338.44, steps = 89\n",
      "08:27:00 [DEBUG] train episode 20: reward = -288.34, steps = 124\n",
      "08:27:47 [DEBUG] train episode 21: reward = -126.20, steps = 122\n",
      "08:28:30 [DEBUG] train episode 22: reward = -295.01, steps = 110\n",
      "08:29:21 [DEBUG] train episode 23: reward = -541.01, steps = 134\n",
      "08:29:48 [DEBUG] train episode 24: reward = -75.28, steps = 71\n",
      "08:30:48 [DEBUG] train episode 25: reward = -116.77, steps = 154\n",
      "08:31:45 [DEBUG] train episode 26: reward = -372.92, steps = 151\n",
      "08:32:32 [DEBUG] train episode 27: reward = -253.98, steps = 121\n",
      "08:33:14 [DEBUG] train episode 28: reward = -69.39, steps = 110\n",
      "08:33:53 [DEBUG] train episode 29: reward = -288.25, steps = 101\n",
      "08:34:32 [DEBUG] train episode 30: reward = -352.66, steps = 100\n",
      "08:35:02 [DEBUG] train episode 31: reward = -87.67, steps = 77\n",
      "08:35:35 [DEBUG] train episode 32: reward = -367.38, steps = 86\n",
      "08:36:19 [DEBUG] train episode 33: reward = -72.54, steps = 114\n",
      "08:36:55 [DEBUG] train episode 34: reward = -130.19, steps = 92\n",
      "08:37:41 [DEBUG] train episode 35: reward = -206.35, steps = 121\n",
      "08:38:19 [DEBUG] train episode 36: reward = -88.64, steps = 97\n",
      "08:39:03 [DEBUG] train episode 37: reward = -198.04, steps = 115\n",
      "08:39:30 [DEBUG] train episode 38: reward = -103.69, steps = 71\n",
      "08:39:58 [DEBUG] train episode 39: reward = -135.01, steps = 70\n",
      "08:40:44 [DEBUG] train episode 40: reward = -173.38, steps = 121\n",
      "08:41:28 [DEBUG] train episode 41: reward = -417.42, steps = 113\n",
      "08:42:22 [DEBUG] train episode 42: reward = -251.79, steps = 147\n",
      "08:42:51 [DEBUG] train episode 43: reward = -422.75, steps = 82\n",
      "08:43:31 [DEBUG] train episode 44: reward = -298.07, steps = 114\n",
      "08:44:15 [DEBUG] train episode 45: reward = -509.73, steps = 129\n",
      "08:45:12 [DEBUG] train episode 46: reward = -84.61, steps = 162\n",
      "08:47:29 [DEBUG] train episode 47: reward = -37.50, steps = 389\n",
      "08:48:28 [DEBUG] train episode 48: reward = 8.65, steps = 169\n",
      "08:49:42 [DEBUG] train episode 49: reward = -34.87, steps = 211\n",
      "08:51:11 [DEBUG] train episode 50: reward = -15.36, steps = 251\n",
      "08:52:10 [DEBUG] train episode 51: reward = -40.78, steps = 169\n",
      "08:53:46 [DEBUG] train episode 52: reward = -87.32, steps = 272\n",
      "08:59:37 [DEBUG] train episode 53: reward = -104.92, steps = 1000\n",
      "09:01:55 [DEBUG] train episode 54: reward = -68.44, steps = 394\n",
      "09:07:47 [DEBUG] train episode 55: reward = -82.20, steps = 1000\n",
      "09:13:37 [DEBUG] train episode 56: reward = -45.02, steps = 1000\n",
      "09:14:56 [DEBUG] train episode 57: reward = -25.66, steps = 227\n",
      "09:16:10 [DEBUG] train episode 58: reward = -29.24, steps = 211\n",
      "09:19:27 [DEBUG] train episode 59: reward = -61.69, steps = 561\n",
      "09:21:31 [DEBUG] train episode 60: reward = -104.31, steps = 348\n",
      "09:22:34 [DEBUG] train episode 61: reward = -63.95, steps = 183\n",
      "09:25:08 [DEBUG] train episode 62: reward = -238.04, steps = 438\n",
      "09:27:25 [DEBUG] train episode 63: reward = 170.66, steps = 393\n",
      "09:28:26 [DEBUG] train episode 64: reward = -144.34, steps = 175\n",
      "09:31:35 [DEBUG] train episode 65: reward = -106.30, steps = 534\n",
      "09:37:31 [DEBUG] train episode 66: reward = -30.74, steps = 1000\n",
      "09:43:25 [DEBUG] train episode 67: reward = 11.18, steps = 1000\n",
      "09:44:26 [DEBUG] train episode 68: reward = -96.97, steps = 172\n",
      "09:50:18 [DEBUG] train episode 69: reward = -26.22, steps = 1000\n",
      "09:56:11 [DEBUG] train episode 70: reward = -9.46, steps = 1000\n",
      "10:02:05 [DEBUG] train episode 71: reward = 2.29, steps = 1000\n",
      "10:08:05 [DEBUG] train episode 72: reward = -17.41, steps = 1000\n",
      "10:09:03 [DEBUG] train episode 73: reward = -78.32, steps = 163\n",
      "10:14:59 [DEBUG] train episode 74: reward = -56.47, steps = 1000\n",
      "10:17:03 [DEBUG] train episode 75: reward = -105.24, steps = 350\n",
      "10:23:01 [DEBUG] train episode 76: reward = -85.25, steps = 1000\n",
      "10:28:58 [DEBUG] train episode 77: reward = -44.23, steps = 1000\n",
      "10:34:56 [DEBUG] train episode 78: reward = -42.70, steps = 1000\n",
      "10:40:53 [DEBUG] train episode 79: reward = -4.61, steps = 1000\n",
      "10:46:54 [DEBUG] train episode 80: reward = -59.02, steps = 1000\n",
      "10:52:52 [DEBUG] train episode 81: reward = -36.24, steps = 1000\n",
      "10:58:52 [DEBUG] train episode 82: reward = -68.53, steps = 1000\n",
      "11:04:51 [DEBUG] train episode 83: reward = -40.34, steps = 1000\n",
      "11:10:05 [DEBUG] train episode 84: reward = -58.39, steps = 1000\n",
      "11:14:10 [DEBUG] train episode 85: reward = -12.41, steps = 1000\n",
      "11:18:13 [DEBUG] train episode 86: reward = -9.76, steps = 1000\n",
      "11:22:15 [DEBUG] train episode 87: reward = -67.77, steps = 1000\n",
      "11:26:19 [DEBUG] train episode 88: reward = -34.24, steps = 1000\n",
      "11:30:22 [DEBUG] train episode 89: reward = -6.51, steps = 1000\n",
      "11:34:27 [DEBUG] train episode 90: reward = -72.43, steps = 1000\n",
      "11:38:32 [DEBUG] train episode 91: reward = -31.45, steps = 1000\n",
      "11:42:36 [DEBUG] train episode 92: reward = -90.86, steps = 1000\n",
      "11:46:40 [DEBUG] train episode 93: reward = -35.35, steps = 1000\n",
      "11:50:43 [DEBUG] train episode 94: reward = -23.63, steps = 1000\n",
      "11:54:45 [DEBUG] train episode 95: reward = -40.51, steps = 1000\n",
      "11:58:49 [DEBUG] train episode 96: reward = -34.42, steps = 1000\n",
      "12:02:55 [DEBUG] train episode 97: reward = -45.70, steps = 1000\n",
      "12:06:59 [DEBUG] train episode 98: reward = -13.30, steps = 1000\n",
      "12:11:04 [DEBUG] train episode 99: reward = -22.68, steps = 1000\n",
      "12:12:34 [DEBUG] train episode 100: reward = -157.09, steps = 366\n",
      "12:16:39 [DEBUG] train episode 101: reward = -50.90, steps = 1000\n",
      "12:20:43 [DEBUG] train episode 102: reward = -33.76, steps = 1000\n",
      "12:24:49 [DEBUG] train episode 103: reward = -54.74, steps = 1000\n",
      "12:28:54 [DEBUG] train episode 104: reward = -18.99, steps = 1000\n",
      "12:33:01 [DEBUG] train episode 105: reward = -50.84, steps = 1000\n",
      "12:37:10 [DEBUG] train episode 106: reward = -59.08, steps = 1000\n",
      "12:41:19 [DEBUG] train episode 107: reward = -42.44, steps = 1000\n",
      "12:45:26 [DEBUG] train episode 108: reward = 3.06, steps = 1000\n",
      "12:49:37 [DEBUG] train episode 109: reward = -70.19, steps = 1000\n",
      "12:53:48 [DEBUG] train episode 110: reward = -18.02, steps = 1000\n",
      "12:57:58 [DEBUG] train episode 111: reward = -40.37, steps = 1000\n",
      "13:01:33 [DEBUG] train episode 112: reward = -142.23, steps = 849\n",
      "13:05:12 [DEBUG] train episode 113: reward = -128.71, steps = 870\n",
      "13:09:26 [DEBUG] train episode 114: reward = -17.06, steps = 1000\n",
      "13:13:36 [DEBUG] train episode 115: reward = -97.91, steps = 1000\n",
      "13:17:50 [DEBUG] train episode 116: reward = -99.62, steps = 1000\n",
      "13:22:11 [DEBUG] train episode 117: reward = -25.60, steps = 1000\n",
      "13:26:31 [DEBUG] train episode 118: reward = -42.49, steps = 1000\n",
      "13:30:49 [DEBUG] train episode 119: reward = -37.43, steps = 1000\n",
      "13:35:08 [DEBUG] train episode 120: reward = -74.25, steps = 1000\n",
      "13:39:24 [DEBUG] train episode 121: reward = -86.12, steps = 1000\n",
      "13:43:42 [DEBUG] train episode 122: reward = -84.45, steps = 1000\n",
      "13:47:37 [DEBUG] train episode 123: reward = 126.28, steps = 908\n",
      "13:51:56 [DEBUG] train episode 124: reward = -56.54, steps = 1000\n",
      "13:56:15 [DEBUG] train episode 125: reward = 7.61, steps = 1000\n",
      "14:00:34 [DEBUG] train episode 126: reward = -8.52, steps = 1000\n",
      "14:03:10 [DEBUG] train episode 127: reward = -49.37, steps = 600\n",
      "14:07:32 [DEBUG] train episode 128: reward = -71.91, steps = 1000\n",
      "14:08:45 [DEBUG] train episode 129: reward = -63.49, steps = 280\n",
      "14:13:09 [DEBUG] train episode 130: reward = -49.66, steps = 1000\n",
      "14:17:29 [DEBUG] train episode 131: reward = -99.76, steps = 1000\n",
      "14:21:50 [DEBUG] train episode 132: reward = -101.48, steps = 1000\n",
      "14:24:40 [DEBUG] train episode 133: reward = -94.13, steps = 651\n",
      "14:29:08 [DEBUG] train episode 134: reward = -44.32, steps = 1000\n",
      "14:33:34 [DEBUG] train episode 135: reward = -68.16, steps = 1000\n",
      "14:38:00 [DEBUG] train episode 136: reward = -14.00, steps = 1000\n",
      "14:42:24 [DEBUG] train episode 137: reward = -61.74, steps = 1000\n",
      "14:46:47 [DEBUG] train episode 138: reward = -98.26, steps = 1000\n",
      "14:51:13 [DEBUG] train episode 139: reward = -45.95, steps = 1000\n",
      "14:55:37 [DEBUG] train episode 140: reward = -11.04, steps = 1000\n",
      "15:00:03 [DEBUG] train episode 141: reward = -6.89, steps = 1000\n",
      "15:04:30 [DEBUG] train episode 142: reward = -34.49, steps = 1000\n",
      "15:08:54 [DEBUG] train episode 143: reward = -29.75, steps = 1000\n",
      "15:13:19 [DEBUG] train episode 144: reward = -7.63, steps = 1000\n",
      "15:17:46 [DEBUG] train episode 145: reward = -35.95, steps = 1000\n",
      "15:22:14 [DEBUG] train episode 146: reward = -58.01, steps = 1000\n",
      "15:26:39 [DEBUG] train episode 147: reward = -60.17, steps = 1000\n",
      "15:31:05 [DEBUG] train episode 148: reward = -12.72, steps = 1000\n",
      "15:35:34 [DEBUG] train episode 149: reward = -51.32, steps = 1000\n",
      "15:40:02 [DEBUG] train episode 150: reward = -5.12, steps = 1000\n",
      "15:44:33 [DEBUG] train episode 151: reward = -61.23, steps = 1000\n",
      "15:49:02 [DEBUG] train episode 152: reward = -37.32, steps = 1000\n",
      "15:53:32 [DEBUG] train episode 153: reward = -35.78, steps = 1000\n",
      "15:58:02 [DEBUG] train episode 154: reward = -28.23, steps = 1000\n",
      "16:02:33 [DEBUG] train episode 155: reward = -60.52, steps = 1000\n",
      "16:07:03 [DEBUG] train episode 156: reward = -28.69, steps = 1000\n",
      "16:11:36 [DEBUG] train episode 157: reward = -56.97, steps = 1000\n",
      "16:16:11 [DEBUG] train episode 158: reward = -36.27, steps = 1000\n",
      "16:20:47 [DEBUG] train episode 159: reward = -7.67, steps = 1000\n",
      "16:25:24 [DEBUG] train episode 160: reward = -29.63, steps = 1000\n",
      "16:29:59 [DEBUG] train episode 161: reward = -18.34, steps = 1000\n",
      "16:34:36 [DEBUG] train episode 162: reward = -38.78, steps = 1000\n",
      "16:39:14 [DEBUG] train episode 163: reward = -48.51, steps = 1000\n",
      "16:43:50 [DEBUG] train episode 164: reward = -42.26, steps = 1000\n",
      "16:48:27 [DEBUG] train episode 165: reward = -35.21, steps = 1000\n",
      "16:53:04 [DEBUG] train episode 166: reward = -13.84, steps = 1000\n",
      "16:57:41 [DEBUG] train episode 167: reward = -51.10, steps = 1000\n",
      "17:02:21 [DEBUG] train episode 168: reward = -24.52, steps = 1000\n",
      "17:07:02 [DEBUG] train episode 169: reward = -56.40, steps = 1000\n",
      "17:11:43 [DEBUG] train episode 170: reward = -52.76, steps = 1000\n",
      "17:16:25 [DEBUG] train episode 171: reward = -56.77, steps = 1000\n",
      "17:21:08 [DEBUG] train episode 172: reward = -28.57, steps = 1000\n",
      "17:25:47 [DEBUG] train episode 173: reward = -18.01, steps = 1000\n",
      "17:30:29 [DEBUG] train episode 174: reward = -86.60, steps = 1000\n",
      "17:35:13 [DEBUG] train episode 175: reward = -42.24, steps = 1000\n",
      "17:39:56 [DEBUG] train episode 176: reward = -64.39, steps = 1000\n",
      "17:44:39 [DEBUG] train episode 177: reward = -58.60, steps = 1000\n",
      "17:49:23 [DEBUG] train episode 178: reward = -47.75, steps = 1000\n",
      "17:54:08 [DEBUG] train episode 179: reward = -13.38, steps = 1000\n",
      "17:58:53 [DEBUG] train episode 180: reward = -20.14, steps = 1000\n",
      "18:03:37 [DEBUG] train episode 181: reward = -37.82, steps = 1000\n",
      "18:06:34 [DEBUG] train episode 182: reward = -144.37, steps = 621\n",
      "18:11:23 [DEBUG] train episode 183: reward = -25.29, steps = 1000\n",
      "18:16:08 [DEBUG] train episode 184: reward = -40.30, steps = 1000\n",
      "18:21:00 [DEBUG] train episode 185: reward = -36.23, steps = 1000\n",
      "18:24:03 [DEBUG] train episode 186: reward = -249.51, steps = 627\n",
      "18:28:53 [DEBUG] train episode 187: reward = -42.21, steps = 1000\n",
      "18:33:43 [DEBUG] train episode 188: reward = -29.54, steps = 1000\n",
      "18:38:34 [DEBUG] train episode 189: reward = -66.94, steps = 1000\n",
      "18:43:26 [DEBUG] train episode 190: reward = -36.37, steps = 1000\n",
      "18:48:19 [DEBUG] train episode 191: reward = 7.51, steps = 1000\n",
      "18:53:09 [DEBUG] train episode 192: reward = -41.29, steps = 1000\n",
      "18:58:03 [DEBUG] train episode 193: reward = -53.87, steps = 1000\n",
      "19:02:55 [DEBUG] train episode 194: reward = -25.27, steps = 1000\n",
      "19:07:50 [DEBUG] train episode 195: reward = 5.41, steps = 1000\n",
      "19:12:45 [DEBUG] train episode 196: reward = 10.69, steps = 1000\n",
      "19:17:41 [DEBUG] train episode 197: reward = -42.86, steps = 1000\n",
      "19:22:36 [DEBUG] train episode 198: reward = -19.69, steps = 1000\n",
      "19:27:31 [DEBUG] train episode 199: reward = 43.78, steps = 1000\n",
      "19:32:26 [DEBUG] train episode 200: reward = 17.24, steps = 1000\n",
      "19:37:22 [DEBUG] train episode 201: reward = -31.67, steps = 1000\n",
      "19:42:18 [DEBUG] train episode 202: reward = 1.99, steps = 1000\n",
      "19:47:14 [DEBUG] train episode 203: reward = 8.05, steps = 1000\n",
      "19:52:12 [DEBUG] train episode 204: reward = -30.82, steps = 1000\n",
      "19:57:09 [DEBUG] train episode 205: reward = 4.38, steps = 1000\n",
      "20:02:10 [DEBUG] train episode 206: reward = -15.40, steps = 1000\n",
      "20:07:13 [DEBUG] train episode 207: reward = -61.17, steps = 1000\n",
      "20:12:14 [DEBUG] train episode 208: reward = 23.43, steps = 1000\n",
      "20:17:15 [DEBUG] train episode 209: reward = 23.01, steps = 1000\n",
      "20:22:23 [DEBUG] train episode 210: reward = 6.76, steps = 1000\n",
      "20:27:27 [DEBUG] train episode 211: reward = -25.18, steps = 1000\n",
      "20:32:30 [DEBUG] train episode 212: reward = -15.10, steps = 1000\n",
      "20:37:35 [DEBUG] train episode 213: reward = 5.44, steps = 1000\n",
      "20:42:42 [DEBUG] train episode 214: reward = 28.07, steps = 1000\n",
      "20:47:49 [DEBUG] train episode 215: reward = -11.50, steps = 1000\n",
      "20:52:55 [DEBUG] train episode 216: reward = 5.54, steps = 1000\n",
      "20:58:01 [DEBUG] train episode 217: reward = 28.85, steps = 1000\n",
      "21:03:07 [DEBUG] train episode 218: reward = -24.30, steps = 1000\n",
      "21:08:15 [DEBUG] train episode 219: reward = -32.33, steps = 1000\n",
      "21:13:38 [DEBUG] train episode 220: reward = 13.72, steps = 1000\n",
      "21:19:07 [DEBUG] train episode 221: reward = -23.18, steps = 1000\n",
      "21:24:35 [DEBUG] train episode 222: reward = -73.13, steps = 1000\n",
      "21:29:59 [DEBUG] train episode 223: reward = 8.28, steps = 1000\n",
      "21:36:13 [DEBUG] train episode 224: reward = -34.46, steps = 1000\n",
      "21:42:19 [DEBUG] train episode 225: reward = -40.52, steps = 1000\n",
      "21:48:42 [DEBUG] train episode 226: reward = -27.43, steps = 1000\n",
      "21:54:43 [DEBUG] train episode 227: reward = -4.24, steps = 1000\n",
      "22:00:13 [DEBUG] train episode 228: reward = -46.48, steps = 1000\n",
      "22:05:30 [DEBUG] train episode 229: reward = 18.70, steps = 1000\n",
      "22:10:49 [DEBUG] train episode 230: reward = -37.42, steps = 1000\n",
      "22:16:10 [DEBUG] train episode 231: reward = -20.73, steps = 1000\n",
      "22:21:33 [DEBUG] train episode 232: reward = -23.47, steps = 1000\n",
      "22:26:56 [DEBUG] train episode 233: reward = 5.32, steps = 1000\n",
      "22:32:19 [DEBUG] train episode 234: reward = 20.45, steps = 1000\n",
      "22:37:43 [DEBUG] train episode 235: reward = 13.88, steps = 1000\n",
      "22:43:20 [DEBUG] train episode 236: reward = 29.19, steps = 1000\n",
      "22:48:52 [DEBUG] train episode 237: reward = -33.77, steps = 1000\n",
      "22:54:28 [DEBUG] train episode 238: reward = -13.70, steps = 1000\n",
      "23:00:10 [DEBUG] train episode 239: reward = 18.16, steps = 1000\n",
      "23:05:47 [DEBUG] train episode 240: reward = 46.52, steps = 1000\n",
      "23:11:35 [DEBUG] train episode 241: reward = 88.73, steps = 1000\n",
      "23:17:10 [DEBUG] train episode 242: reward = 36.74, steps = 1000\n",
      "23:22:51 [DEBUG] train episode 243: reward = -35.77, steps = 1000\n",
      "23:28:38 [DEBUG] train episode 244: reward = 45.93, steps = 1000\n",
      "23:29:17 [DEBUG] train episode 245: reward = 44.37, steps = 116\n",
      "23:29:48 [DEBUG] train episode 246: reward = -15.86, steps = 93\n",
      "23:35:21 [DEBUG] train episode 247: reward = -15.06, steps = 1000\n",
      "23:35:58 [DEBUG] train episode 248: reward = -67.91, steps = 109\n",
      "23:36:29 [DEBUG] train episode 249: reward = -280.83, steps = 91\n",
      "23:37:02 [DEBUG] train episode 250: reward = -218.63, steps = 98\n",
      "23:42:34 [DEBUG] train episode 251: reward = -41.45, steps = 1000\n",
      "23:48:07 [DEBUG] train episode 252: reward = -24.54, steps = 1000\n",
      "23:54:01 [DEBUG] train episode 253: reward = -135.34, steps = 1000\n",
      "23:59:45 [DEBUG] train episode 254: reward = -13.60, steps = 1000\n",
      "00:03:28 [DEBUG] train episode 255: reward = -158.07, steps = 663\n",
      "00:08:46 [DEBUG] train episode 256: reward = -222.26, steps = 939\n",
      "00:14:31 [DEBUG] train episode 257: reward = -32.22, steps = 1000\n",
      "00:17:00 [DEBUG] train episode 258: reward = -114.64, steps = 436\n",
      "00:22:41 [DEBUG] train episode 259: reward = -108.70, steps = 1000\n",
      "01:20:33 [DEBUG] train episode 260: reward = -103.78, steps = 1000\n",
      "01:23:36 [DEBUG] train episode 261: reward = -101.21, steps = 500\n",
      "01:26:43 [DEBUG] train episode 262: reward = 176.87, steps = 536\n",
      "01:32:24 [DEBUG] train episode 263: reward = 70.56, steps = 1000\n",
      "01:38:05 [DEBUG] train episode 264: reward = 53.58, steps = 1000\n",
      "01:43:44 [DEBUG] train episode 265: reward = -50.69, steps = 1000\n",
      "01:47:22 [DEBUG] train episode 266: reward = 203.64, steps = 638\n",
      "01:53:08 [DEBUG] train episode 267: reward = 86.95, steps = 1000\n",
      "01:58:53 [DEBUG] train episode 268: reward = 72.52, steps = 1000\n",
      "02:04:33 [DEBUG] train episode 269: reward = 99.74, steps = 1000\n",
      "02:09:27 [DEBUG] train episode 270: reward = 193.78, steps = 860\n",
      "02:15:11 [DEBUG] train episode 271: reward = 195.16, steps = 997\n",
      "02:18:53 [DEBUG] train episode 272: reward = 222.50, steps = 648\n",
      "02:24:36 [DEBUG] train episode 273: reward = 96.52, steps = 1000\n",
      "02:30:18 [DEBUG] train episode 274: reward = 79.26, steps = 1000\n",
      "02:33:24 [DEBUG] train episode 275: reward = 179.62, steps = 541\n",
      "02:37:32 [DEBUG] train episode 276: reward = 246.95, steps = 724\n",
      "02:40:44 [DEBUG] train episode 277: reward = 243.33, steps = 559\n",
      "02:43:09 [DEBUG] train episode 278: reward = 231.45, steps = 422\n",
      "02:46:47 [DEBUG] train episode 279: reward = 193.85, steps = 629\n",
      "02:50:15 [DEBUG] train episode 280: reward = 246.16, steps = 603\n",
      "02:52:57 [DEBUG] train episode 281: reward = 261.41, steps = 470\n",
      "02:58:49 [DEBUG] train episode 282: reward = 116.50, steps = 1000\n",
      "03:04:39 [DEBUG] train episode 283: reward = 123.79, steps = 1000\n",
      "03:07:48 [DEBUG] train episode 284: reward = 256.04, steps = 534\n",
      "03:10:49 [DEBUG] train episode 285: reward = 228.98, steps = 511\n",
      "03:16:45 [DEBUG] train episode 286: reward = -110.98, steps = 1000\n",
      "03:20:19 [DEBUG] train episode 287: reward = 224.35, steps = 605\n",
      "03:24:28 [DEBUG] train episode 288: reward = -176.85, steps = 704\n",
      "03:28:04 [DEBUG] train episode 289: reward = 233.31, steps = 613\n",
      "03:33:56 [DEBUG] train episode 290: reward = 130.94, steps = 1000\n",
      "03:39:48 [DEBUG] train episode 291: reward = 231.11, steps = 996\n",
      "03:43:30 [DEBUG] train episode 292: reward = 258.40, steps = 624\n",
      "03:47:19 [DEBUG] train episode 293: reward = 207.03, steps = 640\n",
      "03:50:15 [DEBUG] train episode 294: reward = 301.69, steps = 487\n",
      "03:53:20 [DEBUG] train episode 295: reward = 243.74, steps = 522\n",
      "03:55:55 [DEBUG] train episode 296: reward = 242.55, steps = 428\n",
      "03:59:04 [DEBUG] train episode 297: reward = 233.71, steps = 522\n",
      "04:02:41 [DEBUG] train episode 298: reward = 252.60, steps = 598\n",
      "04:05:19 [DEBUG] train episode 299: reward = 266.74, steps = 438\n",
      "04:07:39 [DEBUG] train episode 300: reward = 242.77, steps = 387\n",
      "04:13:41 [DEBUG] train episode 301: reward = 103.89, steps = 1000\n",
      "04:15:19 [DEBUG] train episode 302: reward = 225.41, steps = 268\n",
      "04:18:22 [DEBUG] train episode 303: reward = 235.42, steps = 502\n",
      "04:20:19 [DEBUG] train episode 304: reward = 262.21, steps = 320\n",
      "04:23:44 [DEBUG] train episode 305: reward = 235.70, steps = 563\n",
      "04:26:32 [DEBUG] train episode 306: reward = 249.91, steps = 463\n",
      "04:30:55 [DEBUG] train episode 307: reward = 245.32, steps = 732\n",
      "04:33:22 [DEBUG] train episode 308: reward = 210.31, steps = 410\n",
      "04:36:40 [DEBUG] train episode 309: reward = 242.93, steps = 550\n",
      "04:38:09 [DEBUG] train episode 310: reward = 298.26, steps = 249\n",
      "04:39:35 [DEBUG] train episode 311: reward = 259.20, steps = 239\n",
      "04:42:14 [DEBUG] train episode 312: reward = 242.33, steps = 441\n",
      "04:44:57 [DEBUG] train episode 313: reward = 225.58, steps = 452\n",
      "04:46:42 [DEBUG] train episode 314: reward = 258.81, steps = 291\n",
      "04:51:45 [DEBUG] train episode 315: reward = 226.96, steps = 846\n",
      "04:55:46 [DEBUG] train episode 316: reward = 272.94, steps = 671\n",
      "04:59:41 [DEBUG] train episode 317: reward = 138.30, steps = 651\n",
      "05:01:33 [DEBUG] train episode 318: reward = 250.34, steps = 310\n",
      "05:03:13 [DEBUG] train episode 319: reward = 265.15, steps = 274\n",
      "05:08:03 [DEBUG] train episode 320: reward = 223.02, steps = 799\n",
      "05:13:24 [DEBUG] train episode 321: reward = 250.42, steps = 880\n",
      "05:15:06 [DEBUG] train episode 322: reward = 206.46, steps = 277\n",
      "05:17:36 [DEBUG] train episode 323: reward = 267.64, steps = 413\n",
      "05:20:13 [DEBUG] train episode 324: reward = 237.02, steps = 427\n",
      "05:21:44 [DEBUG] train episode 325: reward = 25.77, steps = 242\n",
      "05:23:45 [DEBUG] train episode 326: reward = 287.57, steps = 331\n",
      "05:28:12 [DEBUG] train episode 327: reward = 263.47, steps = 722\n",
      "05:29:42 [DEBUG] train episode 328: reward = 255.88, steps = 243\n",
      "05:31:26 [DEBUG] train episode 329: reward = 238.99, steps = 281\n",
      "05:33:30 [DEBUG] train episode 330: reward = 252.40, steps = 332\n",
      "05:39:42 [DEBUG] train episode 331: reward = 96.63, steps = 1000\n",
      "05:41:12 [DEBUG] train episode 332: reward = 238.03, steps = 239\n",
      "05:45:48 [DEBUG] train episode 333: reward = 250.10, steps = 737\n",
      "05:48:58 [DEBUG] train episode 334: reward = 222.97, steps = 509\n",
      "05:50:30 [DEBUG] train episode 335: reward = 35.42, steps = 246\n",
      "05:54:17 [DEBUG] train episode 336: reward = 224.70, steps = 607\n",
      "06:00:33 [DEBUG] train episode 337: reward = 126.39, steps = 1000\n",
      "06:03:18 [DEBUG] train episode 338: reward = 194.06, steps = 438\n",
      "06:05:04 [DEBUG] train episode 339: reward = 241.88, steps = 279\n",
      "06:08:11 [DEBUG] train episode 340: reward = 211.99, steps = 494\n",
      "06:10:02 [DEBUG] train episode 341: reward = 270.48, steps = 293\n",
      "06:16:20 [DEBUG] train episode 342: reward = 133.99, steps = 1000\n",
      "06:21:06 [DEBUG] train episode 343: reward = 224.46, steps = 767\n",
      "06:22:28 [DEBUG] train episode 344: reward = 234.86, steps = 215\n",
      "06:25:20 [DEBUG] train episode 345: reward = 153.00, steps = 455\n",
      "06:28:05 [DEBUG] train episode 346: reward = 250.07, steps = 433\n",
      "06:32:19 [DEBUG] train episode 347: reward = 192.83, steps = 647\n",
      "06:34:33 [DEBUG] train episode 348: reward = 233.80, steps = 382\n",
      "06:37:26 [DEBUG] train episode 349: reward = 226.76, steps = 517\n",
      "06:39:48 [DEBUG] train episode 350: reward = 234.68, steps = 418\n",
      "06:45:28 [DEBUG] train episode 351: reward = -345.04, steps = 1000\n",
      "06:48:24 [DEBUG] train episode 352: reward = 268.46, steps = 516\n",
      "06:52:14 [DEBUG] train episode 353: reward = 240.82, steps = 678\n",
      "06:54:24 [DEBUG] train episode 354: reward = 242.19, steps = 381\n",
      "07:00:01 [DEBUG] train episode 355: reward = 135.67, steps = 1000\n",
      "07:01:23 [DEBUG] train episode 356: reward = 223.32, steps = 244\n",
      "07:03:09 [DEBUG] train episode 357: reward = 289.18, steps = 321\n",
      "07:08:41 [DEBUG] train episode 358: reward = 163.65, steps = 1000\n",
      "07:14:16 [DEBUG] train episode 359: reward = 69.05, steps = 1000\n",
      "07:18:43 [DEBUG] train episode 360: reward = 210.11, steps = 803\n",
      "07:21:17 [DEBUG] train episode 361: reward = 224.41, steps = 460\n",
      "07:23:05 [DEBUG] train episode 362: reward = 283.25, steps = 319\n",
      "07:24:26 [DEBUG] train episode 363: reward = 270.16, steps = 238\n",
      "07:30:02 [DEBUG] train episode 364: reward = 159.61, steps = 1000\n",
      "07:32:27 [DEBUG] train episode 365: reward = 263.71, steps = 431\n",
      "07:33:43 [DEBUG] train episode 366: reward = 309.44, steps = 226\n",
      "07:36:01 [DEBUG] train episode 367: reward = 291.11, steps = 406\n",
      "07:38:46 [DEBUG] train episode 368: reward = 256.37, steps = 483\n",
      "07:41:53 [DEBUG] train episode 369: reward = 218.04, steps = 543\n",
      "07:43:46 [DEBUG] train episode 370: reward = 282.23, steps = 337\n",
      "07:43:47 [INFO] ==== test ====\n",
      "07:43:47 [DEBUG] test episode 0: reward = 279.49, steps = 209\n",
      "07:43:47 [DEBUG] test episode 1: reward = 10.49, steps = 228\n",
      "07:43:48 [DEBUG] test episode 2: reward = 284.63, steps = 218\n",
      "07:43:48 [DEBUG] test episode 3: reward = 241.75, steps = 253\n",
      "07:43:49 [DEBUG] test episode 4: reward = 2.62, steps = 228\n",
      "07:43:49 [DEBUG] test episode 5: reward = 286.19, steps = 208\n",
      "07:43:49 [DEBUG] test episode 6: reward = 303.96, steps = 220\n",
      "07:43:50 [DEBUG] test episode 7: reward = 256.15, steps = 244\n",
      "07:43:50 [DEBUG] test episode 8: reward = 243.53, steps = 285\n",
      "07:43:51 [DEBUG] test episode 9: reward = 252.03, steps = 273\n",
      "07:43:51 [DEBUG] test episode 10: reward = 303.65, steps = 221\n",
      "07:43:52 [DEBUG] test episode 11: reward = -209.82, steps = 417\n",
      "07:43:52 [DEBUG] test episode 12: reward = 270.10, steps = 205\n",
      "07:43:53 [DEBUG] test episode 13: reward = 220.75, steps = 261\n",
      "07:43:53 [DEBUG] test episode 14: reward = 279.98, steps = 243\n",
      "07:43:55 [DEBUG] test episode 15: reward = -145.12, steps = 990\n",
      "07:43:55 [DEBUG] test episode 16: reward = -18.38, steps = 223\n",
      "07:43:56 [DEBUG] test episode 17: reward = 259.68, steps = 226\n",
      "07:43:57 [DEBUG] test episode 18: reward = 254.15, steps = 319\n",
      "07:43:57 [DEBUG] test episode 19: reward = 239.07, steps = 246\n",
      "07:43:57 [DEBUG] test episode 20: reward = 304.38, steps = 239\n",
      "07:43:58 [DEBUG] test episode 21: reward = 239.00, steps = 229\n",
      "07:43:58 [DEBUG] test episode 22: reward = 253.46, steps = 261\n",
      "07:43:59 [DEBUG] test episode 23: reward = 252.11, steps = 218\n",
      "07:43:59 [DEBUG] test episode 24: reward = 17.99, steps = 192\n",
      "07:43:59 [DEBUG] test episode 25: reward = 2.82, steps = 203\n",
      "07:44:00 [DEBUG] test episode 26: reward = 273.49, steps = 203\n",
      "07:44:00 [DEBUG] test episode 27: reward = 298.55, steps = 246\n",
      "07:44:00 [DEBUG] test episode 28: reward = 303.20, steps = 193\n",
      "07:44:01 [DEBUG] test episode 29: reward = -71.84, steps = 525\n",
      "07:44:02 [DEBUG] test episode 30: reward = 233.59, steps = 240\n",
      "07:44:02 [DEBUG] test episode 31: reward = 222.26, steps = 299\n",
      "07:44:03 [DEBUG] test episode 32: reward = 235.82, steps = 405\n",
      "07:44:03 [DEBUG] test episode 33: reward = 262.32, steps = 217\n",
      "07:44:04 [DEBUG] test episode 34: reward = 234.88, steps = 230\n",
      "07:44:04 [DEBUG] test episode 35: reward = 277.83, steps = 228\n",
      "07:44:05 [DEBUG] test episode 36: reward = 233.24, steps = 238\n",
      "07:44:05 [DEBUG] test episode 37: reward = 250.93, steps = 219\n",
      "07:44:05 [DEBUG] test episode 38: reward = -2.23, steps = 229\n",
      "07:44:06 [DEBUG] test episode 39: reward = 258.36, steps = 396\n",
      "07:44:07 [DEBUG] test episode 40: reward = 249.10, steps = 211\n",
      "07:44:07 [DEBUG] test episode 41: reward = 252.49, steps = 286\n",
      "07:44:07 [DEBUG] test episode 42: reward = 270.75, steps = 208\n",
      "07:44:08 [DEBUG] test episode 43: reward = 277.22, steps = 265\n",
      "07:44:08 [DEBUG] test episode 44: reward = -13.13, steps = 180\n",
      "07:44:09 [DEBUG] test episode 45: reward = 237.96, steps = 271\n",
      "07:44:09 [DEBUG] test episode 46: reward = 235.87, steps = 206\n",
      "07:44:10 [DEBUG] test episode 47: reward = 240.49, steps = 296\n",
      "07:44:10 [DEBUG] test episode 48: reward = 296.09, steps = 207\n",
      "07:44:10 [DEBUG] test episode 49: reward = 259.75, steps = 216\n",
      "07:44:11 [DEBUG] test episode 50: reward = 232.70, steps = 389\n",
      "07:44:12 [DEBUG] test episode 51: reward = 241.77, steps = 284\n",
      "07:44:12 [DEBUG] test episode 52: reward = 286.27, steps = 209\n",
      "07:44:12 [DEBUG] test episode 53: reward = 279.07, steps = 253\n",
      "07:44:13 [DEBUG] test episode 54: reward = 243.97, steps = 226\n",
      "07:44:13 [DEBUG] test episode 55: reward = 228.24, steps = 233\n",
      "07:44:14 [DEBUG] test episode 56: reward = 285.50, steps = 232\n",
      "07:44:14 [DEBUG] test episode 57: reward = 254.46, steps = 316\n",
      "07:44:15 [DEBUG] test episode 58: reward = 261.65, steps = 261\n",
      "07:44:15 [DEBUG] test episode 59: reward = 266.15, steps = 229\n",
      "07:44:15 [DEBUG] test episode 60: reward = 255.58, steps = 232\n",
      "07:44:16 [DEBUG] test episode 61: reward = 262.40, steps = 230\n",
      "07:44:16 [DEBUG] test episode 62: reward = 226.36, steps = 235\n",
      "07:44:17 [DEBUG] test episode 63: reward = 278.25, steps = 205\n",
      "07:44:17 [DEBUG] test episode 64: reward = 257.12, steps = 217\n",
      "07:44:17 [DEBUG] test episode 65: reward = -20.50, steps = 227\n",
      "07:44:18 [DEBUG] test episode 66: reward = 314.60, steps = 220\n",
      "07:44:19 [DEBUG] test episode 67: reward = 191.99, steps = 404\n",
      "07:44:19 [DEBUG] test episode 68: reward = 257.35, steps = 217\n",
      "07:44:19 [DEBUG] test episode 69: reward = 268.04, steps = 210\n",
      "07:44:20 [DEBUG] test episode 70: reward = 231.78, steps = 247\n",
      "07:44:20 [DEBUG] test episode 71: reward = 236.76, steps = 213\n",
      "07:44:21 [DEBUG] test episode 72: reward = 292.87, steps = 219\n",
      "07:44:21 [DEBUG] test episode 73: reward = -27.42, steps = 264\n",
      "07:44:22 [DEBUG] test episode 74: reward = 267.78, steps = 250\n",
      "07:44:22 [DEBUG] test episode 75: reward = 251.07, steps = 230\n",
      "07:44:22 [DEBUG] test episode 76: reward = 241.02, steps = 234\n",
      "07:44:23 [DEBUG] test episode 77: reward = 274.73, steps = 241\n",
      "07:44:23 [DEBUG] test episode 78: reward = 266.51, steps = 210\n",
      "07:44:23 [DEBUG] test episode 79: reward = 264.22, steps = 232\n",
      "07:44:24 [DEBUG] test episode 80: reward = 274.58, steps = 248\n",
      "07:44:24 [DEBUG] test episode 81: reward = 16.07, steps = 201\n",
      "07:44:25 [DEBUG] test episode 82: reward = 243.79, steps = 284\n",
      "07:44:25 [DEBUG] test episode 83: reward = 293.47, steps = 250\n",
      "07:44:25 [DEBUG] test episode 84: reward = -11.63, steps = 206\n",
      "07:44:26 [DEBUG] test episode 85: reward = 301.89, steps = 207\n",
      "07:44:26 [DEBUG] test episode 86: reward = 259.84, steps = 236\n",
      "07:44:27 [DEBUG] test episode 87: reward = 261.76, steps = 234\n",
      "07:44:27 [DEBUG] test episode 88: reward = -6.52, steps = 209\n",
      "07:44:27 [DEBUG] test episode 89: reward = -11.69, steps = 244\n",
      "07:44:28 [DEBUG] test episode 90: reward = 239.15, steps = 213\n",
      "07:44:28 [DEBUG] test episode 91: reward = 309.71, steps = 195\n",
      "07:44:28 [DEBUG] test episode 92: reward = 286.70, steps = 233\n",
      "07:44:29 [DEBUG] test episode 93: reward = 244.88, steps = 226\n",
      "07:44:29 [DEBUG] test episode 94: reward = 272.46, steps = 266\n",
      "07:44:30 [DEBUG] test episode 95: reward = 252.75, steps = 304\n",
      "07:44:30 [DEBUG] test episode 96: reward = 241.59, steps = 264\n",
      "07:44:31 [DEBUG] test episode 97: reward = 227.30, steps = 297\n",
      "07:44:32 [DEBUG] test episode 98: reward = 165.97, steps = 413\n",
      "07:44:32 [DEBUG] test episode 99: reward = 293.29, steps = 237\n",
      "07:44:32 [INFO] average episode reward = 213.27 ± 111.71\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABOX0lEQVR4nO29eZgcV3n2fZ+q6m32XftmSbYseZMty4tswMGLbBYH/BJMXmIWOwRiloQkYCDsL4GEAIEABgMfYBubQLCxAe/GBmNsy5JlSdYua5dGoxnNPtNr1fn+qDrVp6qrunt6eqZrup/fdc01PdXbqZru+zznPs95DuOcgyAIgqgtlEo3gCAIgph+SPwJgiBqEBJ/giCIGoTEnyAIogYh8ScIgqhBtEo3oFg6Ojr44sWLK90MgiCIGcWmTZv6OOed7uMzRvwXL16MjRs3VroZBEEQMwrG2CGv42T7EARB1CAk/gRBEDUIiT9BEEQNQuJPEARRg5D4EwRB1CAk/gRBEDUIiT9BEEQNQuJPEAQRIDYfHsCWI4NT/j4zZpEXQRBELfCW7/4ZAHDwK2+Y0vehyJ8giMDz9cf3TEs0HCRSGWNKX5/EnyCIQGMYHN96ci+u/86zE3re++/ahC8/tHOKWjX17OkZmdLXJ/EnCCLQJDJ6Sc97ZPsJfP+P+8vcmqlFjva3Hx+a0vci8ScIItDEU6WJfym82juKlw4PTOl7jCTSuOH2P+PV3tGc+wbHU/btnd0U+RMEUcPE0xMXf93gJb3X67/2B7zVmnCdKp7a3YtNhwbwjcf32MceeeUEBsdT6JfE/+RIAkcHxvHiwX5k9PL7/yT+BEEEmkR64sI3msjYt9MlCGeihA6nWNKWtRNWTfkdTqTx/rs34cY7nkf/mCT+w0k88PJxvO17zyFTYmeWDxJ/giACTSlCPBRP27dPjWYF1TA4OC8spIdOjU/4PYslZXVGYc2U36Fxs627ToxgYMy8fcasRpwcSWI0mUFIZYho5ZdqEn+CIAJNKbbPcCIr/r0jSfv2aZ98CO+/e5Pv8zoaIgCAA325frxn21I67nr+UFEdikBM6grxl9sqMnzOmN2IkyMJjCTSaIhoYIwV/frFQuJPEESgkSN/IbJ3PncQz+7r832OHPn3jSYd9z26vQdp3cCuE8MAgLFkBtd98xm8fGQQC9piAID9fWNFte3LD+/Ep3/9Cp7e3ev7mE2HBrD4tt/Z6xSE+IeE7RPPWlTinM6Y3YhE2sCJoQQaolOzFpfEnyCIirC/dxTbjhZOZ5SzfZIZA/GUjs88sB3/94cv2Mcf39GD3+/qsf8ejudG/vLrfPxXW7H+v55B/1gKO7qHsaN7GJ//zXbEQioAYFeRmTbHBuIA8s8rbDzYDwB44OXjALK2jxD/ESny7x5KoCGiYV6L1Qn1jqExEiqqLROFyjsQBFER/uJrfwBgljE4OZxAV1M05zGPbT+B/3pir/33cDyNvSdzLZmvPbYbmsrwFytmAXBG/idHEnhyZw9++MwB+9h9Lx0DYHYMwlAZSWTsSdhHtp9A70gSnY2RvOcgLKmo1Wl4Ma/VFPKjA+Y8wljSjPQV642Hpcnp/rEUmmIauqz3PdQ/jgsWteZtQ6lQ5E8QxLQjL2bafHgAa//tSdy/+WjO49531ybs6B62/x5OpPHcq6fsv9O6Ac45DveP47A0SSt89Ja6EHaeGMFj23vw3P7s8wSnRpMYscR3JJGGbnDMbY4ilTFw74bDeN7jOTJJ6zxUxd+TrwubHcOxQXOUMGqJv7gGcuQfT+uIhlR0NZnirxscjRGyfQiCmMGkMoadr/6KtHpViOLD204UfI2heAYbD/Xbfx8diOPUWArjKR3DiQz+6vvP4cWD/RiKp6EqDOuWdmDLkUGcGE54vt4td27EbfdtBWBG/jrnWNrVgKWd9fj643tw4x3P40j/OD55/zY8uOV4zvPFfIQQ8oGxFG64/c840p/tiMRc8FHLIhJpqKLjkD1/AIhqKjobs6Mg8vwJgpjR3HLnRpz3hccxMJbCS4fMVbSMAU1R09PucQm04ZHbvuvEMHYcH8aK2Y0AzKwcOS1zw4F+vO17z+FIfxxNUQ3nLWjB0YG4Y/QgM57S0TOctG/rBoeqMJy7oMV+zMFTY7jnhcP48L2bc54vxF8I+W+2HsemQwO4/Q+vZs/DOg1hRY24In852wcAIiEFTVHNTu9sJPEnCGIm88c9vRhNZvDNJ/eie8gU+pZYyJ4sdUfnva4sHYUBn7r/FQwnMrju7DkAzHx8OcoWPLuvD82xEM5b2GK+1kgy5zFejCUz0BSGVXOb7WOPbjdHJO5c+yv+82m82mtmBYlzEHMGsq0lp4G++8cbbM9fTPyOJNJoqw/bj4lqKhhjtvXTMEUTviT+BEFMC2IS8/EdPRi0FjalMoYtlCICF7hF/fGPvta+vXZJGwAzWj/sIf6nxlLoaopi1dymvH68myMDcSiMOSZZn9plpnGGVMUW8kRaxwEpHTSVMXCgb8yeP0hlDKR1A4dPjTsmdJ/d12d7/kmrYN1wPOMQ/0jIlOVZlvUzVZE/ZfsQBDEtjCUzqA+rODYYx4aD5kRqWud2BAyYUfJoMoOzP/cYrjxzluP5SzsbMLc5iuNDCZw9r9l6voGXjwxiYVsd2hvCeM3yTnzzSTM7aE5zFHVhDafPasROH9vHTSpjQFMZzlvQgv+44Rx87Fdb7VHDaDKDwfE0WuvDduclGE/ruOI/n7b/fnDLcXuOQPQ9a5e0YfeJEdvztyd8k2k0x0KIaAqSGcPOHBKRP9k+BEHMWAyDYyyl4+pVswEAR/rNyc+Ubth+OQAMjKft+57Y2ZPzOg9+6DLc87cXoT6iQVMYugcTeGZvH95wzhzc//fr8I9XnW5nx8xuNiPn8yT/vhgUazXtfCtFU+6cxChjQCrABgB/yLPIS3j+sZCKRFq3PX95wrcxqtmib4u/Ffk3ULYPQRAzjd9t7cbQeBpjKVPwlnU15DxGXnz17w/vwnef3uf7eh0NEVy6tAOAWR7h8Z090A2ON50z136MiJjnWOsG1i5pBWPAhYuLy5fXrFA9JHn8YtHVN57Yg8W3/Q4nhpzzE14dlaC1zvTsYyEVyYxhp3YK8R+Mp9AYDdlzCuK3WGNA4k8QxIxi94kR3HrPS/jXB16xfe62+jCaXDaGuA8Antnbi40Hi6unH1IVuwqmEGcgG/HPbjaPXX/uPDz04cvxy/dfipsuWWQ/7jNvXGnPQ8goQvzVXPEXZRx2F7HL1pVnzkJHQ8QuLx2z8v1FldJUxsDAWApH+uM4Y1aD7fVHrd+ibY1RmvAlCGIGISySkUTaznCpj2h2RCsKm8ninzY4dCk7pjnmL3xhKTIXwglkJ0rnWJ2AojCcOafJfI4k6Dddsgj3/O3FOa9rR/5qdqK4LuJcwVtM9lBnYxgKy9o+7lXAqYyBFw6YaxYuPq0dUc2yfazf58xvQUdDGEs66gu+VylMWvwZYwsYY08xxnYyxrYzxj5iHW9jjD3OGNtr/W6VnvMJxtg+xthuxtg1k20DQRDBQ6RuzmqM2lkwjRENTZagd1oVNMck8c/ohmMjlmjIX6JkIZdvizIRQvwdz7E6DE1h0FQF7VKWjUBkB8mvWR92jlaKEf/2+ggYAzKGGemLlb6ClG7g+f2nEA0pOGd+ixT5m487Y3YjNv7rVfZIptyUI/LPAPgnzvmZAC4GcCtjbCWA2wA8yTlfDuBJ629Y990IYBWA9QC+yxjzL4xBEMSMIJ7SHQuzTgyZE7dtDWGMJU1fvyGq2aLaYY0AHJG/zh1F0vLVzBFCHlKZbdUAwFUrZ+GG8+fb5Zm9niN8da+Rheph+8Rcwn1yJIFYSMWP332hY4Qg09EQhsIYLO23i8YJkhkdh06NYWlnA8KagogV8U9F7X4vJj2TwDnvBtBt3R5hjO0EMA/A9QBeZz3spwCeBvBx6/jPOedJAAcYY/sArAXw3GTbQhBEZUjrBs78zCN496WL8bk3rwKQzegxDI7RpDnJWR/WbAEWkb971y1Fql0f1VTc+d61niUORCciR+gAcMGiVt9iaELQRaeiKAy3XbsCTdEQPnn/NgCAar2/Jts+LvHvHkqgtS6EK1Z0oT6i5aR+AkB7QwQMsG0suQNpjGjmGgc9m9opRD9fh1dOytrFMMYWA1gN4AUAs6yOQXQQXdbD5gE4Ij3tqHXM6/XexxjbyBjb2Nvrn0pFEERlyeimwP3shUP2MeH5mxkulu0jlS2ot3x0kQkEABmD59g+rzm9E+cvzBXzkGaKc2QCYunOqAGA9792KdZImUCqktupRDTFsViseyiBljrTMgqp3jLaUhcCY8w+H1nUm+tC5kKwDLdHDtlUz+mJ/Mv2LoyxBgC/AvAPnPN8Kyq8xkie2+Bwzu/gnK/hnK/p7OwsRzMJgpgCRHQr9pr92mO78bLYvEQ3XBO+poctJjZHk9lUT93gtkcOFLB9fCL/fNi2j+t1NUnYxcvJoh5SFcdjUhkDrfWhvO8f0VQo0l2y7dNSF0IyYyCpGwi77J6JdGaToSwJpIyxEEzh/xnn/D7rcA9jbA7nvJsxNgfASev4UQALpKfPB5BbLo8giBmDiG5Fos4LB/rtlanJtGH7+vURFZ+8bgXmt8Zw5pxG/M/GI44JXyCbHQPkF38hzpEJRMpCqN2+uiaptIj85Tz/sKYgpCqOBWmiIJ14raaohuFEBmFNwcWnteOc+c1gUqwbC2dfrzkmIn8DYSvy9xqVTCXlyPZhAH4EYCfn/OvSXQ8CeJd1+10AHpCO38gYizDGlgBYDmDDZNtBEETl0F0VOBNpHWsWtaKjIYKUbmA0qduTmo3REG69Ylk28k9kvF4SQIFsH20Skb9LYFXVK/LPHgupimMOADBHMTLt1hzG6gUtuPO9axENqZDLCjki/1gYGYMjkdHtNrlX+E415Yj81wH4GwDbGGMvW8c+CeArAH7BGLsZwGEAbwMAzvl2xtgvAOyAmSl0K+d84js0EwQRGNziH0/piLWqCKsKUhkdo8l0zkpVEVm7I3+ZfEKYtUmKF397tKA5XzfksH2syF8aDYRVxTE6ALIrb4XV1VYfxoG+MUeb5Y3X3Z4/YHZ8IddoZMaIP+f8T/D28QHg9T7P+RKAL032vQmCqBwbD/bj/3zvOTzzsStyJj0TGR1RTUVYU5DKGBhP6jkZMyJiH03lEX+tsO1Tmufvivxl8bcEW1EYNIUhY3CENafnD2TFX5fEH3BG+Mwn8hcppmPJ7NaRwuuPzhTbh5hefr35mL3zEUFUkl9uNLdd/NO+PsckLQDEUwaiYVP8kxnD7AxcEa0Qb+6Z7mFSjO3jjuLz4fccOaqX7R1x28v2EemnhnUCHQ3hnDbLz5BTPVuE+Kd0ewQ0o1M9iakloxv4h/95GX/1PVoSQVQekWqZ0Q24tB+JtI5YSEXEivwTaSNHyMNa4Tr7RWX7TCBSjvhMEsvCLq8zCEnv4R7d1HvYPoBT5OXX8or85fOw8/1nWqonMfUIW9VvP1KCmE5EtJzSeW7kn9YRDSmm7aMbSKT1HAsnrGb/9ovw86U9uiPmYvB7jmz7yPaOEOaQynJsH1E62rDFP2K9trf4yx2Z3EG4J6HzWV3lhMR/BiGGlzzfOJkgpgmRDZPWDfuzKf7WDY5YSEz4muLvLpEQ0ryjYpliavtMJPIP+0z4ysIul4qQJ2M1n8hfrHEQdYKiPp6/fFx+f3Ed66z6Qe4iclMF7eQ1gxDfL5J+IggIMczohm19AGbUD5hiJzx/hTG01Xt7/oAp/gPILZHg1ykAk/X8i4v8RQcVUpWcGj7FTfh6v64j8rdGQG86dw5a60L2Ji5TDYn/DCIb+Ve4IQSBrHindWdJhoS1OUssrCKiqUhlDDCWG8XLEXs07Bf5l9fzd+fUC2SR9or8i8n26WqKYOWcJqyc25R9Xcd7ZG/LGT2ig2mMhnCttTH9dEDiP4PQSfWJACFy4zOGswyzHflr2cifc54juHKKpp/PXVy2z+RX+MqozMvzz7V9RLaPOPeGiIaHPnK54zHy0gC5g3FG/pVx30n8ZxDcKPwYgpguNCnyl20fsVOVGfmbi7wMruQIeb6SyYJi8vwnJP5FLAyTI3yRBWRm+7hX+JptEyNyL4tKnvD1y/mfyMilnJD4zyAMivyJAOGY8PWI/GOS558xeI6QqwqDqjB7ctiLYur5T0T868IqVIXZdXm88LR9VMVe+StojJivITo+rw5M7i78Mn8o8icKQuJPBAlhY2Rckb/YkD0SUszIXzfAdG8hD6mm+PuneubL9slG5cXSGA3hF393sb2toxeaj+cfcnn+os0XLGzFxkMDnqMU2ephAM5b0IIFbXWOa+FXEnqqIfGfQVSL5987ksR4KoNF7VOzNykxPYhoP2O4I3+zXINI9eTczFDzEviQoiABwzfCX9Ba5/v+pWT7AMAFi9ry3q945vnnrvAVwv6jd1+Iw6fGHc/LPkZ6Xcbw61vXAXDuXlYp24fy/GcQVaL9+Oqju/CBu1+qdDOISSKifbfnPxy3xN8q7yDwEnie576HPnw5FrQVFv9yi6cz8s+OLtwTvoLmWAhnz2/2vM/P83dk+1Qo8ifxn0FUi+0zkshgOJGb003MLMTnMe3adH0obv5vRbaPwGu1roiAz/EQT6WAOpUy4VsMXuUdQiqzbZ8bzp+PvV+6tqjX8kv1lDuS6arf74bEfwZhVIf2m5GiXiUnU8Pohrf4i/1sRZ6/IF+1yivO6Mo5JouwF6Xk+ReDc5GX1cGoqi3YXnV+/HBE/j7Fj8nzJwpiVIn664aRUwuGmHn42T6D8RSA7ApfgVc2zG8/dBla68OesuhhoTsIlej5F0Je7Sui/ZDGbAvInfKZD6fn7/0YSvUkClIttk/G4EhT5D/jEcFIMuOs7SNsn5hL/L2yYc6aZ9o9PR7FCgtF/pEpivxVj2yfkLSZi3tTl3zIp8B8zmcinUk5IdtnBlElgb+ZGqhT5D/TEdlnibTumvBNQ2GmqDlW8ebJ2fcS+kLi32rV0mmr98/ZLwXVZfswZlpBWgmRv3wOFPkTJVMtkb9ucKSrpSerYezIP607LMm+0RTqwxoYY47JzHylGryEsZD4nzmnCQ99+HKcOadxgi3Pj+pK9QypChjLlnR2p3zmo5jInxZ5EQWpllLOGcOgyL8KyEi2jxz57+8dxdyWGIDsXrVA/shfLZAj74dcRK1cyG05e14zLj6tHUA2Q8e90jcfogPLdy6U508UpFr0MmNwGLx6JrBnIi/sP4UfPrN/Uq8hMnwSaR26NIE/nMjY+flnzs6Kc97I30P8vTqE6UB+3xsumI8737sWgDT5O4F2iWg/3zMo24coSLXYPiLNM20YiCjTs3EF4eTtdzwPALjl8tNKfg3xeUxmjJzAZEGbGfnLGT75snJK8fynCtXnfUXk77fYywvxSvnOhSJ/oiDVIv4iYqRc/8qTnsRwMuMT+QPAQo+Vufnq9HgJboUCf98RR2kTvubvfP0YrfAlClIl2o+0JRQk/pVHpGWWgmGLv3ORF+CsyfP1vzoXTVENLbGw72t52eh+E6RTjZ/4h+xUzxJsnzznQit8iYK4v2AzFXtlKC30qjiTEX/dnvDNpnpeYk2Ozm7ObkX41vPnY+vnrslrb3jbPiU3bVL4ibuI/Cdi+9iRf57HkOdPFKRabB8R8VPkXxnkrDFRiqEUhPgbPLtI6xtvPw9P7T6JVRPMwvGyfSo14es1+QxInv+E2mU+1qtzu3BxK148OFCx8yTxn0FUSeBvl3aYjN9MlM6YVW8fAIasUgylIJcY33x4EADQUhfCO9YunPBreZdDrpAo+to+k4j8PV7yx+9ZixNDuSubpwuyfWYQ1ZLnb0/4VktvNsM4NZq0b0/W9pndFIXCgJcODwCYXLTufm6lbB+/zBxNqvA50dfyes2GiIZlXQ0ltLA8kPjPIKrF88/Y2T4U+VeCU2PZaH8yto/BORqjGk6f1WiPSv3SJIvBLfaVSvX0W8Frr/AtobZPhfqxvJD4zyCqRPuzef7k+VeEU6PlEf+MzqEqDEut6JUxf7+8GNxiX7FFXr6R/8TLOxSzwrdSkPjPIKrF9hGefy2Wdd50aKDiIx7Z9jk5ksS7f7wBX35454TnYAzOoTBmb74+sYnQXNxiP92CKd7PN8+/hFRP2J5/8NSfxH8GUS2Rf3YTkCo5oSLZd3IEN9z+Z/zbQ7sq2o7uoQQYA2Y1RbCzexhP7+7F9/+wH68cG5rQ6+iGGfmLsg2TjdTdkf902z7i3Xzz/EtK9RSe/6SaNiWQ+M8gqmEDd86ztfwrHQFPNyMJc8vCZ/f1VbQdxwbjmNUYRUdDBN1Dcfv4cCKT51m5ZIT4W2UbJuP3A5X3/EV07r/C15rwnYCSKxT5E+WgGvL85dFLrWX7xK0Uy/7x0tMry8HxwTjmtkTRFA2hdyRrAQ1PMPPH4CLyt8S/zLbPdEfLhSL/hW11aIxomN/qv6m832tS5E9Mimrw/GWfv9by/EeszcoHxooX/7ueP4QHXj6GbUeHsO4rv5/Qc2Vu/dlLeO9PXgQgxD+G+ojq6IxHJhj56waHKtXsn+yn07HfLZv+aFm8nd+IY0lHPbZ9/hosbC9e/LOvFTz1p0VeM4hqmB+VV/XW2grfUUtc/UY8D2/rxpO7TuKmSxbhnPktAIDv/H4fFrbXIaIpODYYx+YjA/iLFbMm9L6cc/xuWzcAsx7P8cEErjlrds4Co5HExCJ/3eDQFMWO/CfbmcuZQpVI87zijC48tqNn0hPXDuwOpXwvWS5I/GcQ1eD5y8JXa9k+srgOJ9Joijq3H/zxnw9iw4F+DMfTuOOmNRhOpHFiOIG6iGr76oWi4eODcTy9uxd/fZG5ynbToX7ccPtz9v19Y0mkdAPzWmI5kX4pkX9Ey074TrYzl+cMKiGW33rHapwcTk5oQrcQlOpJlIVqsH3khWq1lu0zmsyK69H+eM79Yk5g85FBcM6x7+QoAKBvJIlTY6Y3PzSextajgzjSP+75HvdvPoZP3r8NQ1b+/v9uOuq4/9iA+b7zWmKoDzvr6w9PNPLnZrQesSL/yc7hyIJficg/GlInZOkUQzH1/CtFxcSfMbaeMbabMbaPMXZbpdpRSQyDY3/vaPGPd323vv37vfjPR3eXuVVTi5zhUyuR/8PbuvHNJ/Y6IuuB8ZRVBz/7T42nTfHvHUni2GAce3tGAJhZOD3DlvjH03jzt5/F5f/xFDjnGHRNHov3EEK+68SI4/7DVqdhev7ZgX9bfXjCkb9hcGgKK1tJ4krbPlOBHflXuB1eVET8GWMqgO8AuBbASgDvYIytrERbKsnvd53ElV//A44OeEdxbtzZPv/52B58+6l9U9G0KUOODv/xf7bgOzOs/cXwbw/txG2/2goAONI/jg/87CV844k99oQvAJwYSmDFpx/BJ+7bij09I+CcI57SsWK2uRn55sOD2NuTGxjItXh+8Mx+nPeFx3F8MDuKGE2m7cfFUzpeOTaE9792KT7zRvPrtdvqDOa2xNAgiX9XY2TCnn/GMBd55dubdyKoSmVtn6lAVIKgVM8sawHs45zv55ynAPwcwPUVakvFODIwDoNnh+KFyFfbZ8uRQXzwnpemPYOGc46Vn3kEP3n2QFGPd5/DV2fYyKUYNhzox4aD/QBgT7QCpiDXWVbL1x/fAwD4xcajuPobf8Tdzx/CeCqDc+Y3AwAO9o1h78lc8T85kp2kfWLnSfuxAjGpPBxPY+/JEaR1jvMWtKAxagr97hMjaIxoaI6FUBc2j0U0Bc2x0ITz/A2DQ1Xyb8w+EZyef/DEsjTI83czD8AR6e+j1jEHjLH3McY2MsY29vb2TlvjposBy5ftLzJ9L5/l/84fvYDfbu3Gnp6RnPuGxtO2n1xuRpMZjKd0fO43O4p6vFfnVK65DM55IDaF7x9L2TVz5FFd92Dc3uHq2KCzw396dy/GUzpa6sJorw/j+FACe3tGsFjyoEMqw8G+7OuJjkTYRXt6RuzP1HAijaNWULGwrc4W/10nRjC3xdxftz6iWr81NEZDE5/wFXn+ZbJ9ZIEMoliWglIgfbSSVEr8va5EzreWc34H53wN53xNZ2fnNDRrehE526eKFH/Z9kmks2Ke0Q37i7urO1f8z/3CY3jDfz/j+7qPbj+Bbz25t6g2uBkYM8Wm2E2ovUYvR4sc+XDO8Z2n9uHJnT0YTqTxixePODqOh7adwJovPeG4NjK/2XLcsahpqjDFPwXD4I5R3bHBONrqw7Zoy/SNpZDMGIiFVMxpiWLfyREcH0rgkqUd9mOWdjbggBTli2s5ksjgQN8Yrv7GH/GHPWaQNBRP2x3PvNYYGq3MomPWAi8Atu0TC6loimkTXuRllndQpsT2qVRRt3JjV/UM4OlUSvyPAlgg/T0fwPEKtaViiJWexUb+sm7KHcbLRwbt27tODHs+d3/vmOPvB14+hrM++yj+tLcPf3fXJtuG8EIuAeBGnEOxk35eGSHbj3u32c1dzx/CVx/djc88sB2fuv8VfOxXW+3nbjkyiK1HB9E/lvK8nv1jKXzo3s245acv5txX7GghmdELLrJKZnSMJjMwuLmo69hg3P7i9wwn0RDV0FqXu5etyN6pC6uY3RTDiwfN+viXLG23H9McCzlGDKLDPzWWykkcGI5ncHQgjsaoafHI/v681pj1Xpr9nk3RkK/nn8zonqU4zEVe+TdmnwhKFdo++er5V5pKif+LAJYzxpYwxsIAbgTwYIXaAgC44j+fLtq3LhciU+Mnfz6Iu547WPDxskj1S2V5xS5Kc5ujOdkdfnbPJ+7bhtFkBluPDeZ9z21Hh3DJl3+Pe1447Hm/EMOIpjoi7leODXm+t1cueO+ofzR+7TefwV3PHwIAPL6jB4A5yhCZML0jSRwfjOP67zyL7/9xPwDvDUpEFPzK8WHHaOFPe/tw2icfws5uZwdkGDxnBPHZB7Zj9Rcfz5sS2e+olZ/CsYE4Vs7JbmnYGNXsUdKCtljO8+rCqh2ZA8DZ85rxpnPn4vb/ez6aY851AeLzc2o0aWfxCIYTaRwbiNulCITtA8C2fUSHUBdW0RjVMJzI4KuP5hade9v3nsM3PUaGusGhSLV9JotzhW/wxLIUgru+t0LizznPAPgggEcB7ATwC8759kq0xWoPDvSNFe1bl4v+sazn/+kHCp++bPv0jWUFc/ORAcxvjeHipe05GSJeNkdGNzBuCfOo5PN6+fEHT5kjhm//PvvlP9I/juu/8yx6R5K2aPWNJrHi04/g1d5RDI6n8Mb//hP+6Zcv5763R3rnKR/xT+sGdnYPY8dxs9qk2Cf22EDcbv/RgfGcmvRe9oWwlnSDY8knHrI7rV+9ZObBy6MnAPinX27Bik8/4jj2R8tS+aHVyXgh18o/eGocYyndKf4RDamMeQ3Ontec8/xYWLM3P2+IaFjYVof/fsdqXHv2nBzxP2QJfv9YKmdk9+TOk3hy10nMt6L8Bkn8l3eZGUXC868La7jY2nj97udzO/kj/eOeSQm6lepJ2T7+MFrklQvn/CHO+emc86Wc8y9Vqh0AkEhXJt/cbSEkM85Ic/PhAYd4O2wfSWQ2HBjAuQta0FYXzol6e0dz9wiVHyPEHQDGrFTEp3afxIfu3QzOuS24x4cS9vNeOjxg2ywDrjzzF/b329dzwwGz/TskW8fL8+/zEX8h4qKD6RlOQmFASjfsSPfoQDznnL2yVtzptMIuEu3fcKAf91kdAecc928+BsD5P2lviAAAHn7lBNK6gd9uPZ5jGcmR/3ar0zpTEv+upqg9ojjLEv+YJJ51YRX1lh2zZnGrQxBb6pziL2KBvtEU9vc5O/0d1kims9Fss7yaWKST1kuR/7plHbjlsiWO893ZPYy7nj+EVMZAysv24c6SzpNFqULPP7vCN3jnQyt8AYynJpblUA445znCeURa9ZnRDbzlu3/GhV96wk7vkyN/uVPoG03i7HnNaIyGEE/rjghefpywO+T3lf124SG/58cv4jdbjuNIf9xR/0WIsdwhuP313pGk/f6cc/z1D57Hdd96xreG/6ymiKMjkxGiPjCeRiKtYyiexppFbY7HHBkYz7mOw/E0huJp3HrPS3jklW5856l9OZPKIitKdMD3bz6Gf3toJwBgm1TXfiyZFUNx3q/2juKnfz6ID96zGfe+aEbKhsFx/+ajjuslru3KuVnxX9pZb4u/iPxXzGm074+FVaxbZkbh/3jl6Y42r5idfR1ZG/vHkp5rAjobI7j5siUAnHMyYjQgxD9mTUC31IWQSBt2B3DtN5/Bp3/9CpIZw3NUaFh5/pGy2T7y7eCJZSlki8VVth1ekPgjmyo33e+ZzDi/UIekKFzO6nj0lRMAXLaPK1puqw/bvq5s5cjiL6wSYTeZ75mNiC//j6fw/36btb5+sfEIthwdtP9O6Qae2nUSGw6Yk5EnhuI54n+4f9w+L51zO1f9VWtC0h35L2qv9438hfgPjqdw0lrhumZxq31/fVjF0YF4jvgPxdP4+mO78but3Xj/3S/hq4/uxq7uEUeWze4TI7bdJ+gbTSGZ0bHlqCz+VjE23UDfaBKr5jbB4NnO45k9Zm3++zYfwz/+zxaHN77d6kTkTbqXdjZgYXs9AOCsuc2IaArOW9Bi3x8LqVjW1YiDX3kDzpWOA8Abzplj35YtoJcOD+Kkh733vXdegKWdYovFXD+9LiRsH9Xxmu6RVMbgSGUMbDjQj+u/86zdOYjIv1wrfFVHG8vykhXHrucfQNe/psR/T88I7nzuYE5e+VTlwOfDKyPloCTEO6QJyONWNClbDO4vaENEs8V/xEf8V332Uew+MWK/d6vLRgCAH/4pO+n97af22VkngCmE7/nJi3hipznx2j2YG/nv6x3NioPOcVqHKXR/e+dGbD48kOP5dzUWF/n3WKOf8xea4n/FGZ24fvU8HOnP9fzv+ON+3PviEcexDQf7sW5ZBw5+5Q245LR27O4Zwc7ukRyL6MRQAkelyVNRj6dvNAWDA1etNCtq7rRSah/ZfgIf/Z+X8edXzU7gxHDC/sIfPDWOaEhBe302u2dhex1+8p4L8b13no/W+jB+9+HL8JHXL7fv90oDFURDKn5w0xp87W3nemYMrV7Y4vh7lTTiAMyRxk2XLLL/VhSGpqhmW0LN1msOx9N2bSBBWuf45P3bsOXIoB0w6NYevpPZt1emGss7BNnzr6mqnt96ci9+u7UbnAPvunSxfXy8AuIvBOu/37Eaa5e04cqv/8ER+e/oHkZYVdBWH7atBGftdeeXsy6s2l8YkY3COXd0KABwzX/90b69sL0eA+ODBdsaUhnSOscW16To8aF4TiS//+So7fnrnKOzMYL9fWM4dGoc//7ILtxy2WmOx3c0RApG/gNjKfsaLGirw5bPXo2mqIYf/ekABsbTOZOdJ4YTWDmnCbtODDuumYiCV81twp3PH8IXf7sDTVENdWENJ4SVNZhwWES/fvkYOIfteZv2muZYTHff5mM4rdPs5HSDY3ZTFOOpDIYTGcxtiTmi7oimYlaTivVnmVH8sq6s5QPkF38g2/nc/YKZAXX58g48s9fseJZ3NWDz4UGsXdKGc+Y150zE/uZDl+W83g9uWoNF1khERP6D42l0u8o9p3QDIavaZUoa2U129y4Z+bWqxfPP5vkH73xqKvLvsCbs7rMm8wSVEH8RMc9ujmJWk/kjR+m7ukewrKsBC9pidp69bPvIFSIBM/JvckX+/7vpKB7cctxXUBa2mWmAXp/Lu2++CH/3WlOohU8vjwIAM0o+4qpOOZLM2J62wZ3pknVhLSfPv70+jOFEJmey+19+uQUf+fnLAEzbQdhGs5oiaI6FwBjDUstOedEqpSBz9apZ9ihBcMZs8/HXnTMHqYyB5/afwgdet8yxQO3EcBxHB8bRaPnh3//Dflz3rWdsv39WUxSxkOqw7GY3RR0d0NyWqG31zLPSKk/rrM+JxL2IhYuLx1osoV7UXoe7b74I99xykX0e1501G//6xuJKZV10WrudXSTbPu5a/6mMYb++6AjFHr7lQpHUKIBaWRK0h29AEOKZdHn88fT0T/gKn1oM39vqwg4L5dhgHIva6zC7OWZ/Ebkj8ne2WSzRN+8zI+btx4fRENFw181rPdswy8oEWS550oAZda1b1o6PXbMC77pkkV0UzC2yB0+N48RwwlGCAMhODBuG2Um94ew5eO3pnegdSeaMFDqsNpwaTeET923DAy+bHfMvXaWIX+0dg6Ywh9ct2u3OcQfMXZeEqAlOn2VG2asXtOC0jnrMaorg3Zcudsz5vHpyDEcG4o5JWAB2eeW5LTHHoqY3nTsXCVfHNbclhtee3gUgG8n//p9eh996RN5u6opMmxSfm7qwhsuWd+DSZR1496WL0VYfxtWrZhf1Gm5aJPF3d9Jp3UDY2sA8nZki8a/CRV52nn8AT6emxD+bceL0neXI3x2BThUiy6TN8oNb60MO7/rkcAJdjRHMaY6ieygBzrljM5eRRMaRYid7/rtOjGA8lUHfaBKdjRF0NjhFUCA6i8XWsF/QFNXAGIOqMHz++rNwnuUlu60AwbplHY6/hV2TMcz1BPURFV2NEZwcSeR4/qLWzUuHB3DvhsN2tO/mYN+YHfEL5jbH7DTJeS0xh7e+tLMBc1ziL09+/uBda/CzWy5GLKzik9etsEX620/tQ/9YCstnOcX/t1u7ce78ZrTVhx2LmurDKpKuVOF5LTFctty8JvJHrZihf6yA7SMQ/2t5VLesqxEvffoqexHXRJEjf/eK3rSejfwTsu1jif+bzp2Lf7nmjJLeV1CNef60wjcgCPF35yzL4n98MIGXDjvtjamgfzwNxrJfuNa6sF0qIZHWMZzIoLMxgtlNUSQzBgbG0w7bZySRRps06Vcvif/XH9+Dv//ZS+gbTaKjIZyzOCj7HFM4RPQtcD8+LO1sdOnSdlx71mz8+tZ19rELFzvTL4X4G9yM/OvCGrqaIugbTdl+MWBaPku7zI7nbmsVb2PE2/Y42DeGZtcEtaIw215ZNbcJmz59lX3f4o56zGl2iqDsgS/tbLCf+5bV87HjC+sdi67One9cgLWzexjXnT3H8ToKM1MovSL/1Qta8PH1K/C5N0+sUnmxmTNiA5VQGXedapI8f3dKbiqT9fzjqQwMg4PzrKj99ztW49Yrlk3q/asy8rezfYJHTU342pF/xj/b563ffdacRPy368qWxeDF4HgKzbGQHe201ocxOJ4C59z2/rsao7agP7GzB0/vylY2HU5kcPqsBjsTqD6iOtLJnt7di+VdDVja2YCmmIa3XTAf248PY0f3MN5wzhy88ew52G+lOTa4BLfJJf6yIDVFQ7j9nRcAAP79hrPxau+YY1MQwJmJJCL/zoYIdIPbpRx+9+HLsLi93lrUpOL5/aaltMCah2iOhRyvM5LMYFnMaU8BwEevOh1/d9cmrLb8/cXtdTh4ahwNEQ1nzmlCSGX43/dfao+w8nH3zRchpJmVM5d1NeDjv9rmuP/68+Y5rkdEU6GpSk611a7GCBSF4QOvW1rwPQVnz2vGtmNDRU8Mig65nCW8VYWhMaphKJ62AwNBWue2+I8ldXsUWs79bqtT/IO7yKsmxT9f5C9K4hqcQynQX/cMJ6ApzF75ORH6x1KOdL22ujDSOseR/jhu/8OrAIDOpogtzB/7362O56cyhuP57oU2HQ0RnBpLYe2SMBhj+OrbzsV3n96HHd3DeO+6JbhgUau9C1jM5TPnRP6S+MtW09svNPeJfViqWQ8Ag5Jo6wa3In/TgukeTNjtE53GvNYY9liLlMREtpeotXiMYK5Y0YU9X7rW/vvXt66z50MuWdqOzZ+5Oqdz80OMLFZ6TMx+bP0Z9hyCiPwjIcVT/ORSCsVy7/suLlg0TkZ0QKlM+cQfMP/3w/G0vTJYkNINu8MZl3YgK2eAJA9ilCrxJMjzDwgiWnF/YbwWeRVT6PEjP9+Mz0v1gDYdGsBTu08W1ZaB8ZQjz14s3f/nX27BvRvMVaOdDRGHj+0m332tdSEMjKfsDCfAtGeWdNRj+Swzgn7XpYtxxRmdjtxvADkbizvFP9eTFraRsEq81iB0WY8Ru07JoinEesXsRgyMmwutRIf8969bagudn30l01IXtkcP4r0ny58+fgX+/nVZS0O0J6wqDp/64+tX4KNXnY5Ll3bkvEYhGiKao92FCE+R+DdENIylMjkF+EzbxzzXeCpjW5A04Zsf8vwDQsYn8o97lHfgudsLIJ7SsfIzj+DR7eaK28HxtEPobrj9z3jPj3NLBnsxMJZ2WBHido+0U1NXUwTt9f6jitY84t8znADnQEdD9jEXLm7DU//8OlvcOxsj+PF71qK9IYJ3XrwwWwcm5hRM2fP38qQvXNyGn7/vYvzLNSsA5BZWqwurdtQsVtRqUmj31f9zLm66ZBGuWTUbI4mMvejri395Fj62fgW6msx2tXgsbJoO3P8Dv8i/qzGCD79++bTkqL/uDDOb6JqzSsvs8UNVGHSD54y80lKe/3gqG/mXM89fHkUE0SYpBSXAnn9Nib9YIZvKGI5Vvl55/l6bS/WOJDGe0vEFK9pP60berRXzMTCecoiZEHK5M2mvj6Appvn6qvkif7FytVhL6v/95dn43JtWAcj1/EMFIn8AuPi0dtRZPrE78q+PaJjXEsP81pg9z6Cq2XO6bHkHvnD9WfZISKRuir+F+LrbNV24M3BEqqfw/AWaOn1f8TNmmyUg3JPtk0VTGDIGR9rIFX/xUY/L4l9O20de5BVEtSyBbG2f4J1QTYm/nLss346ndEd0C+Rulg5kh9oiRz9jcM8SxcVgev5ZMRP+vZzuqSoMjDHfyUqvyH/zp6/COy9eaP/dMYH5CBHx59g+BSJ/gUiB9BJ/xhiuPHOWfcyrQxOdodiTtiVm/i3mJLw8/0og5lfcto9WBUa1pirQDZ5j+6T17GhgfKrEv4rLOwQx9J/5n9YJINfGkb3S8ZSO1nqnsHgF9KJDECOFjG5+STYfHsAZ//qw/biTIwm8/fvP+e6AxTlHMmPYOykBcAj81StnYdcX19t/+0XvXn52a30Yi9qyefvuXPd8iLz/fKmekTyLkMRksLvWTr0VOV9/3lzUh1W8Y+1CzxGEmHA9YJW5EPMg4Ql4/uVE1CVyI87TbftUQ0kCVWFI64bnzl3icz+eymb7lHPCV9b76hF/63dlm+FJTWX7ZFziL6zc8bSOtvoIeobl2vm56u8+ltYNZAzuqGQJAJsODuCFA/3Yfmw4J9ccyFpK8ge8ORayF3TNaY46xNHP3qn3KQUg130X5XuLYV5LDGFNsevUCOQveL6NO8R97shfdHKrF7Zi+xfW5zzPbrcl7nbkb52HGG2Ua9OQYnnkH17jOfcjIv+Ipjhsn1AVeBWaJf5pg6M+rGJMskTFSvh4OmNH/uVM9azOqp7BnfCtKfGXxVue0IqnMmiJhcBYVpi5h5vj7g8yBodu8JxOQRQqcy/+cbfD/b1ZPqsR3UMJOy1S4Gf71EW8xVBYSGFVmdDEWWdjBFs/e3Vekc1r+/g8r9iMG2H7PLq9B01RzZ6AztaUmd4aTH6b0ovIP6ypVRf5a6qCeNrcs7c+ojnE3xH5T8GEbzVu4G5P+AbwdGpK/GUfU47UUxkD9fUaoppqp31OJPJ3+6NikdbRgTi2HBnMqcsuBiDuIfNsK6vFLaLtDf6R/4/ffaFjL1ggGzG7a9sUQ6HoOn/knxXLuc1RpA1urkT2ab+brsYIwpqCVMbAey9bYkfYH7tmBfrHUni9NGdQSeTIXxapcq62rRSawmw7M6wpdkVXILsYcjyl49l9ZiXRkFZO26cKPX9Q5B8IdJ/IX5SmjYaUAuIv3bZEXzeMnMeKVaxfedjcDPvAl69zfLDF492fh7VL2vGLjUcd6ZmAWaTMi/qIinMXdHkcN/+tF59W3kwQoLgJXwBoawjjwVsvQzyt56wA9qM+ouHBD67DY9t78F5rByrArIF/z99eXHqjy4zt+WvV6flnDG6XcI5oKtK6afeIyH/DgX68eLAfFyxqLWuHLPedAdTKkmAU+VeOt3//OZzWWY8vv/UcR1qmnOuvG2YUHgupGIBVC9/jtWSRNysfGmaUZHhH/oLhRMYxWZm1fZyfiBvOn4e5LVFcYm2mLXjH2oU4NhDH910bh/vlvZ85pwl3/M0FeM3pnZ73T4Z8kb+iMIRVBSndQERToSisaOEXrJjd5NiuMIiIaxB2ef7l9L8rhaYw6NbnWrN26RLbLcgp0Yvb63Hne9dO+P+bD7UaI/8Al3eY+ePUAgzF0/aiId0n24fbkX9W2Lwif3ltQN9oEmlL+N2bePe6dqYSq1qzr23+dmsFYwyXLu3I+aCEVMUza8drNyfB1atmT8kEaaHNukUOfLk29Q4ijto+cqpnNdg+qmIFNGbkL3+G5L2uv/LWs8sq/ABwzarZeO86c8RXDaMogBZ5VZSwpjg2nxBfVoftY3AoijON0WuRl6zxfVKH4l7o1eeK/N0pn36Rfz5CHnZLJb4ghTbrtle/lmlT7yCSPUd3nn8Qv+ITw17kpXOEVOf+vOMpHe31YXz5rWfjItfotBxcuqwD/3S1uWl9FVxKAFnRD+L5VL34h1TFFnrd4PaCIXnCV+ccCmOISdGqHPkn0jpOjSYdHYLI5MkYRq7t49qW8Pigsw6+yCSakPgHZAFRwchfUxy/qxH5HOX0zulc4TtViPIOGcOApio5GU+rF7biHWsX+jx78igBtklKQSR1BPF8qvcbahFWleyeowZH1FpwJNcrN6wdiZy2T/Y17vjjflz/nWcdHYLYwMMr1dNdbMs/8i/+PNzCUqkos9hsoGoWf2fkX12ef0hlyBgG0pbn7/5/T/VaBnE5q+BSAsiKfhDPp3q/oRam7WOKrc6zkX8q45XtI4m/pP69I0n0jSad4m9F/mmPCV837sjfKGF1pNtPLqY+/VRQSNTFvMisEtJMZwrZFb7uPP+Z/3VSrVTPtJ3t4zynqZ7XEJF/tXj+zONWUJj5n9YChFyRv5f4G4bZQ8d8PP+0bkZC8jFH5F9A/N2Rv26nehb/gQi7Iq5KiX+hyP9VayPzUsoazxT8a/sE7ws+UTRFsdeuaKq5Z7K8VWRois+x6mwfivwrR0RTkLKidKftY1b2TKR1GJxDVeDYmFte1p+yqnfKE7v5PH837tRP7pPtkw930bCJFGwrJ8XaORcubp3illQOR22fKvP8Namkc0hV8Nk3r8I33n5e9v6ptn0CXAWzFCjPv4LIKxR1g6NOivy/9eQ+fOOJPYiFVKgKcwxpZT0XK3jltQETifzd4m9vhDGBT4T7S7dqXmVy4QtF/u9/7VLs7B52FK2rNkSKY31Yc3TK1VDVU1WZvXI9pDLMa4khIW12NNW2D2MMjAUzUi6FIHdm1fsNtRDlAgDL9rEi/5Ru4JebjgAwd/JS8uT5i2whIfgA7C9ExuCOlcNeDCcySKR1+/Wzef4TyPaRvnTfe+cFeP2ZuSt7p4NCkf9t166YppZUjjnNMfzwpjVYt6wDO7qH7ePVYftY2T66YXdm4WleyKYwFkixLIXsIq8KN8SDmR+qFCCsZVM9My7PX47YzWwfyfZxiL+I/LMRkEgV5dyZOeRmtlWk7ZS0P6t434l8IOQv3fqzZlesjky1eLGT5cqVsxALuyZ8q8D2US3PP215/oCzwN10jG4UFsxIuRTEWQTxe1P14i9P+Bqc29F332gSaUn8FcYctWkM14Qv4B35m8f9q00utPZlla0fr5LOBc+jwqmTc6s4e2cyyHZcUNZiTAYxoZvM6Pb5hKa5bLUZ+U/520wL9gR2hdvhRW3YPiLy1w3EwuYH+btPv+p4nMKYY7s+L9tH9vzlcs3JPJtoz2+LYcNBp/hnUz2LP49KC8tvP3w5eoYThR9YY8iRcDWkJ6r2Ju263bHJ2T7TMaldXbaP+TuI51P94m8VGuOcw+BAWPWesFRd5R3k3RnFhK8c+cu33Yu6ZLwif72E8g6VziRpqw9XLL00yFRfqqd5DolMdsN2UcbCLI8yTbbPzB9EAZBTVyvcEA+q5BL7E1YVcJ7db9dPRBWFIar5p3oCQNIR+UsdgXTbHf3Nb/WyfUqo7VMFfnI1Iv4vpmDN/P+REHfdyvYBTL9aZMlNn+0z868lEOzIv/rF3xL0tG7AMPz/CarL9nEv8gKcEb7s88vi3xh1DqYaIhqaopq96Tsw+WwfIjiIzr4a0jwBuNYt5O7dPB2rmBWlmsQ/uJ5/dXxi8yBEM5UxzMjfJzpTGMO6pR22TSN7/rbt4+Pzy8fd4h8LqwipCjKSj1RabZ+q/1fNSIToV9qWKxeOnckcezeLyd+pP8/XndGJ8xe1TPn7TAfZbRyD9/moekWx93/NGDC4GVVcvTJ39yFFYWitD+Pzb14FwDvbJ+Uj+I7IP5LdtAWAvYBMXh0s+oGJfCCmelk9URpCLKthsheA7/4EIkV6OuY1vnnjarxl9fwpf5/pQGzjGEDtn5z4M8a+yhjbxRjbyhi7nzHWIt33CcbYPsbYbsbYNdLxCxhj26z7vsWmuEsUC1QS1gStpjDccdMax85aQHa1rWiNI9vHUmtZ5BM+E75NMWfkHw0p9sIZAUX+1YOIhKvFlnOsWFblyN8S/yo5z+kiu8K3su3wYrL/yccBnMU5PwfAHgCfAADG2EoANwJYBWA9gO8yxoShfjuA9wFYbv2sn2Qb8iIif7E3r4jQ3F9W8afwGh2LvDLWIi+fSV6n5+8R+avMUf+npM1cqsRWqDaqLvL3WbcwnbZPNWHX9gmg6z8p8eecP8Y5F3u7PQ9AjNWuB/BzznmSc34AwD4AaxljcwA0cc6f46a63gngLyfThkIIkXeLv7tMQbbutvnbc5FXxnthVyqP5x8NqVCZO/I3f08oz58irkAiIuVqseUcnr9H5F8NZaunE1tXAnjZytmk9wJ42Lo9D8AR6b6j1rF51m338SnDjvytzaeFveOOYMSH3rZ9DI9FXg7bx8/zz4r/m8+di9nNUbNG+iQj/2rIIa9GRKRcDaUdALftk+v5U+Q/MZjHraBQcJEXY+wJALM97voU5/wB6zGfApAB8DPxNI/H8zzH/d77fTAtIixcWNrWcUL8E4VsH5fnLzcqbWf7FLZ9RMnouc1RfOsdqwGYXyhdqv9TSp5/tdgK1Yb43FRNqmeByL9ADUPCRZDr+RcUf875lfnuZ4y9C8AbAbyeZ43yowAWSA+bD+C4dXy+x3G/974DwB0AsGbNmpI+duID7LZ93HuTigU6WdtnApG/dHtWYxQfeN1SvGV1dkCjKsxR+bOUPP8gpooRIie9ekZm8ghG8/D8E3nqWBG5iEsYxK/vZLN91gP4OIA3c87HpbseBHAjYyzCGFsCc2J3A+e8G8AIY+xiK8vnJgAPTKYNhYi4bR/fyN/8LQT5r3/wAj5072Zwnt2m0W/CV675oyoMH1+/AqfPanQcc6Z6TjzbhwgumqJUzcjMmeqZvS1snziJ/4QQE71BXLQ22bHqtwE0AnicMfYyY+x7AMA53w7gFwB2AHgEwK2cc/Gp+QCAH8KcBH4V2XmCKcFvwjes+kX+2WO/2XLcUa5ZFnzZw5ePey3xd3v+pWzjSAQXTWVVMyEvR/vyOQnbR05xJgqTzfYJHpMq7MY5X5bnvi8B+JLH8Y0AzprM+04EP88/x/bx2Ts0rXvn88vIPqjX7lxmnr+R8/gqCRZrHlVh1RP5S9G+XM1zuTWSndVUme1DZyrMR1eCQE1U9QSKz/Zxf4czcuSvF456vALAHNtHbONYJYJR62gKqx7PXzoPec3KDefPw9zmKC5Z2l6JZs1YsuUdKtsOL6pjrJoHMXQdtyJ/zWdFpurTQ6eKiPxlvLy9XPGH53sRMxNVUaqmto+8sKtJWrPCGMOlyzroMztBstk+wbtuVS/+7glf8U9w2z7Z0qvO58sF2eRFXn54iYB/nn/BlyNmACGVVU2qpxz5N0Sr3hiYcpjrd5Co+v9uSHV6/prPhG/W9nF5/hmprn+Jkb+7tk8pef4AsLi9DmfNa57Qc4ipR1VY1UT+8nm4S5UQEye7wjd4n4+qF393bR/xT8it7eNdfc/L9gmpzHfTdi8fX1UUz6qeExX/p//ligk9npgeqtXzrw9773pHFE+Qs32qY6yaByH+4ylX5O+T7eMWZKftI8Tf/7J5ZfuoijlxvO3oEAA51bP48yCCi6ZWT56/7PmTvz95/LIIg0DVi39IVdAQ0extFAtF/sXYPvmiPK/hnaYo2N0zgjd9+0/YdWLYtn2qRTBqnQ+/fjnedeniSjejLFRLjaKgYEf+AbysVW/7AGZu8rHBOICscIc053/Dr+62bPuICV8xaoiF1JwVj96Rf/bY0Hi6pPIORHB587lzK92EslEt9lVQqOZ6/jOCOc0xO/IX4hxxr/D1GZ5lJPEXoi0yO+o8PFFvzz97LGNwyvYhAguJf3nJ7uEbvOtaE+I/qylq3/bfzMV7wjfpkeEjRg11kVzx9yvvIEjrBuX5E4GlWlJWg4L4hgexT62J//ScZg/xL1DVUzCWzMCNmBSrD+e6Zn7lHQRpnUupnkU1nyCmDfL8y4sSYNO/JsR/lof45+T5+9TdHvESf9Xf9vEKnBy2j27YaZ/k+RNBg2yf8uK3eDQI1IT4z/GyfYpM9RSRv/zPE7ZPfaS4yN9h+xicJnyJwELiX16UAHv+NZHtM6fFK/J3Zfv4bLogxL8urGHUui18UVHjXMavvIMgnTGyE7410fUSMwlVYbh8eQf+5uJFlW5KVRDkyL8mxH9ZV4N926+ks+oT+QvbJxpSbfEXllE0pOYUbfMr7yCQF41R5E8EDcYY7rr5oko3o2oQEX8Qv+o1EXtGtGyErhXI9nEL8qnRFACgtS5b50RE92FNQZ0r+vcr7yBI62T7EEStkHUUgvddr4nIHwBa6kIYHE/bf7snfLObLjif1zOcAAC0N4Sx96R5THQcEU1BQ1RzTAp7l3TO3v7T3j70jCSsx5Z2LgRBzAwo8g8A//D65QCAJqtSoXvC1y/P/+RwEk1RzTFSEBvBRDQVDa5J30KR/yPbT2Dz4UHrvQL4iSAIomxkV/gG77teM5H/u9ctwdsvXIiYlZ4ZKdLz7xlJoLU+7PDtRUcQ1pScjB8v8ffLoKDInyCqG6rqGRBiUl7+hYvb8Ok3rrQnfkVw7hb/wfE0WmIhR/QuFoRFNCUn199vJy8vqLAbQVQ3zCeoDAI1Jf4yIVXBzZctsWv8+O3hCwDNddnIX2GAYc3YRkK54l+oto8M2T4EUd0oPnOJQaBmxV8glrPnq7ttRv7Zx4ktGcOqgqg726dAqqcMBf4EUd2Ir3gQAz0Sf9ewzEuQW+uc4q/bkb+aa/sUKO/geGwAPxAEQZSP7Arf4FHz4i/8e788f8Bp+4DBjvxNz9854etVFdE/8g/iR4IgiHIR5BW+NS/+IvJX83hzTtsnW+M/oimOSWTAO/L327yZtJ8gqptsUc/gfdlJ/IWo+2T7ANkyDuJ+OfJ31/eZmOcfvA8EQRDlg+WxkysNib/L7pH1+K2r5wEAzpjd6O35a7mef6FFXoUeSxBE9WB/xQMY6JH45/H8/+LMLuz64npcsKjVjt6Zy/PPtX0o24cgCJN8iSSVpubF3738WhZ/lTE7lVNxRP6m5x/2WOTlZfv4e/4B/EQQBFE2uhoj+OerT8eVZ86qdFNyqJnyDn6I7ByvRV6yaGuOCd+s7TO/tc7xehMp70AQRHXDGMMH/2J5pZvhCUX+LtFnrsjfvm11ErLnH9YUXLi4DffcchHes26xfb8b8vYJgggaNS/+olinlzUjz9NmPf9sto+o63/psg57zwCK/AmCmAmQ+Lvy/GXkKF5xTPianr8s6nYn4qHzfp4/QRBEpah5z9+d7eN1H+AUet3y/OX7rzijC/GU4TmJS5E/QRBBgyL/PGUdnJ6/eZtzjpsvPw0A0F4fse9fs7gNn3nTyrzvQRAEERRqPvLPl4crR/Eiejc4cPNlS3DzZUuKfg8vS4kgCKKSUORfpO2j2uLPJ/weYmKYIAgiKJRF/Blj/8wY44yxDunYJxhj+xhjuxlj10jHL2CMbbPu+xar8EonVWFgzHvBlbzxetb2KeU9ar6PJQgiYExalRhjCwBcBeCwdGwlgBsBrAKwHsB3GWNiKeztAN4HYLn1s36ybZgMqsJ8bRnF0/YpIfInz58giIBRjpD0GwA+BkBWxesB/JxznuScHwCwD8BaxtgcAE2c8+c45xzAnQD+sgxtKBmVMd9UTK9Uz1Iif6reSRBE0JiU+DPG3gzgGOd8i+uueQCOSH8ftY7Ns267j1cMRWG+RZe8Uj1Lifw5SugxCIIgppCC2T6MsScAzPa461MAPgngaq+neRzjeY77vff7YFpEWLhwYaGmloRWpO0jfPtSIv9SnkMQBDGVFBR/zvmVXscZY2cDWAJgizVZOh/AS4yxtTAj+gXSw+cDOG4dn+9x3O+97wBwBwCsWbNmSiRUUfLYPh7lHUqK/En8CYIIGCXbPpzzbZzzLs75Ys75YpjCfj7n/ASABwHcyBiLMMaWwJzY3cA57wYwwhi72MryuQnAA5M/jdJRGfNdhKWWyfPXSf0JgggYU7LIi3O+nTH2CwA7AGQA3Mo51627PwDgJwBiAB62fiqGqjDfCVmvks6l+Pft9eHSGkcQBDFFlC0B3RoB9El/f4lzvpRzfgbn/GHp+EbO+VnWfR+0sn4qRj7x9yrvYJTQ2gVtdXjio6/BitmNJbWRIAii3NT86qPWuhBa60Ke95Urzx8AlnU14s6b1+KjV51e0vMJgiDKSc2L/0euPB133XyR533yhO9kPH9BV2MUb1ld0cxWgiAIAFTYDQ0RDQ0R78vgV9J5MlCdH4IggkDNR/758PL8J/2aVOqBIIgAQOKfB7/9fCeDRkXeCIIIAKREeXDYPmWya8j2IQgiCJD450H1KO8wWajCJ0EQQYDEPw9e5R0mC3n+BEEEARL/PDhKOpfJ8w+R508QRAAgJcrDVHj+fkXkCIIgphMS/zwoU5DqSRAEEQRI/PMg6z1N1BIEUU2Q+OdBjvZpK0aCIKoJEv88yIu8KD+fIIhqgsS/SMjzJwiimiDxL5JylXcgCIIIAiT+RUI1eQiCqCZI0YpEJc+fIIgqgsS/SCjVkyCIaoLEv0go1ZMgiGqi5nfyKpZyRv733HIRGqPe+wYTBEFMByT+RVLOmjyXLuso22sRBEGUAtk+BEEQNQiJP0EQRA1C4k8QBFGDkPgTBEHUICT+BEEQNQhl+0yAf7nmDJw7v6XSzSAIgpg0JP4T4NYrllW6CQRBEGWBxN+Dn91yEfpGk5VuBkEQxJRB4u/BOlqERRBElUMTvgRBEDUIiT9BEEQNQuJPEARRg5D4EwRB1CAk/gRBEDUIiT9BEEQNQuJPEARRg5D4EwRB1CCMc17pNhQFY6wXwKESn94BoK+MzZkKqI3lgdpYPmZCO6mNhVnEOe90H5wx4j8ZGGMbOedrKt2OfFAbywO1sXzMhHZSG0uHbB+CIIgahMSfIAiiBqkV8b+j0g0oAmpjeaA2lo+Z0E5qY4nUhOdPEARBOKmVyJ8gCIKQIPEnCIKoQapa/Blj6xljuxlj+xhjt1W6PQLG2EHG2DbG2MuMsY3WsTbG2OOMsb3W79YKtOv/Y4ydZIy9Ih3zbRdj7BPWtd3NGLumgm38HGPsmHU9X2aMXVfhNi5gjD3FGNvJGNvOGPuIdTww1zJPGwNzLRljUcbYBsbYFquNn7eOB+k6+rUxMNfRF855Vf4AUAG8CuA0AGEAWwCsrHS7rLYdBNDhOvYfAG6zbt8G4N8r0K7XADgfwCuF2gVgpXVNIwCWWNdarVAbPwfgnz0eW6k2zgFwvnW7EcAeqy2BuZZ52hiYawmAAWiwbocAvADg4oBdR782BuY6+v1Uc+S/FsA+zvl+znkKwM8BXF/hNuXjegA/tW7/FMBfTncDOOd/BNDvOuzXrusB/JxznuScHwCwD+Y1r0Qb/ahUG7s55y9Zt0cA7AQwDwG6lnna6Ecl2sg556PWnyHrhyNY19GvjX5U5DPpRTWL/zwAR6S/jyL/h3s64QAeY4xtYoy9zzo2i3PeDZhfTABdFWudE792Be36fpAxttWyhYQNUPE2MsYWA1gNMyIM5LV0tREI0LVkjKmMsZcBnATwOOc8cNfRp41AgK6jF9Us/szjWFDyWtdxzs8HcC2AWxljr6l0g0ogSNf3dgBLAZwHoBvA16zjFW0jY6wBwK8A/APnfDjfQz2OTUs7PdoYqGvJOdc55+cBmA9gLWPsrDwPD1IbA3Udvahm8T8KYIH093wAxyvUFgec8+PW75MA7oc57OthjM0BAOv3ycq10IFfuwJzfTnnPdYX0ADwA2SH0RVrI2MsBFNUf8Y5v886HKhr6dXGIF5Lq12DAJ4GsB4Bu45ebQzqdZSpZvF/EcByxtgSxlgYwI0AHqxwm8AYq2eMNYrbAK4G8ArMtr3Leti7ADxQmRbm4NeuBwHcyBiLMMaWAFgOYEMF2icEQPAWmNcTqFAbGWMMwI8A7OScf126KzDX0q+NQbqWjLFOxliLdTsG4EoAuxCs6+jZxiBdR18qMcs8XT8AroOZxfAqgE9Vuj1Wm06DOdu/BcB20S4A7QCeBLDX+t1WgbbdC3OImoYZodycr10APmVd290Arq1gG+8CsA3AVphfrjkVbuNlMIfyWwG8bP1cF6RrmaeNgbmWAM4BsNlqyysAPmMdD9J19GtjYK6j3w+VdyAIgqhBqtn2IQiCIHwg8ScIgqhBSPwJgiBqEBJ/giCIGoTEnyAIogYh8ScIgqhBSPwJgiBqkP8feciJoM7YQGcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > 250:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
